diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-03-02 02:36:47 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-03-02 02:36:47 +0000 |
commit | 8624512aa908741ba2795200133eae0d7f4557ea (patch) | |
tree | d5d3036fccf2604f4c98dedc11e8adb929d6b52e /arch | |
parent | 7b8f5d6f1d45d9f9de1d26e7d3c32aa5af11b488 (diff) |
Merge with 2.3.48.
Diffstat (limited to 'arch')
99 files changed, 3454 insertions, 2377 deletions
diff --git a/arch/alpha/config.in b/arch/alpha/config.in index 8e44bb0e2..1686fefbc 100644 --- a/arch/alpha/config.in +++ b/arch/alpha/config.in @@ -60,6 +60,7 @@ unset CONFIG_ALPHA_LCA CONFIG_ALPHA_APECS CONFIG_ALPHA_CIA unset CONFIG_ALPHA_T2 CONFIG_ALPHA_PYXIS CONFIG_ALPHA_POLARIS unset CONFIG_ALPHA_TSUNAMI CONFIG_ALPHA_MCPCIA unset CONFIG_ALPHA_IRONGATE +unset CONFIG_ALPHA_BROKEN_IRQ_MASK # Most of these machines have ISA slots; not exactly sure which don't, # and this doesn't activate hordes of code, so do it always. @@ -178,6 +179,10 @@ if [ "$CONFIG_ALPHA_XL" = "y" ] then define_bool CONFIG_ALPHA_AVANTI y fi +if [ "$CONFIG_ALPHA_GENERIC" = "y" -o "$CONFIG_ALPHA_PC164" = "y" ] +then + define_bool CONFIG_ALPHA_BROKEN_IRQ_MASK y +fi if [ "$CONFIG_ALPHA_SABLE" = "y" -o "$CONFIG_ALPHA_RAWHIDE" = "y" \ -o "$CONFIG_ALPHA_DP264" = "y" -o "$CONFIG_ALPHA_GENERIC" = "y" ] diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c index 725dd4f51..25d9583dd 100644 --- a/arch/alpha/kernel/alpha_ksyms.c +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -98,6 +98,8 @@ EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memsetw); EXPORT_SYMBOL(__constant_c_memset); +EXPORT_SYMBOL(__direct_map_base); +EXPORT_SYMBOL(__direct_map_size); EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); EXPORT_SYMBOL(pci_map_single); @@ -144,6 +146,10 @@ EXPORT_SYMBOL(alpha_fp_emul_imprecise); EXPORT_SYMBOL(alpha_fp_emul); #endif +#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK +EXPORT_SYMBOL(__min_ipl); +#endif + /* * The following are specially called from the uaccess assembly stubs. */ diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c index 5fa112173..1452b6336 100644 --- a/arch/alpha/kernel/core_tsunami.c +++ b/arch/alpha/kernel/core_tsunami.c @@ -24,7 +24,6 @@ #include "proto.h" #include "pci_impl.h" -int TSUNAMI_bootcpu; static struct { @@ -210,17 +209,23 @@ void tsunami_pci_tbi(struct pci_controler *hose, dma_addr_t start, dma_addr_t end) { tsunami_pchip *pchip = hose->index ? TSUNAMI_pchip1 : TSUNAMI_pchip0; - - wmb(); + volatile unsigned long *csr; + unsigned long value; /* We can invalidate up to 8 tlb entries in a go. The flush matches against <31:16> in the pci address. */ + csr = &pchip->tlbia.csr; if (((start ^ end) & 0xffff0000) == 0) - pchip->tlbiv.csr = (start & 0xffff0000) >> 12; - else - pchip->tlbia.csr = 0; + csr = &pchip->tlbiv.csr; + /* For TBIA, it doesn't matter what value we write. For TBI, + it's the shifted tag bits. */ + value = (start & 0xffff0000) >> 12; + + wmb(); + *csr = value; mb(); + *csr; } #ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI @@ -229,7 +234,7 @@ tsunami_probe_read(volatile unsigned long *vaddr) { long dont_care, probe_result; int cpu = smp_processor_id(); - int s = swpipl(6); /* Block everything but machine checks. */ + int s = swpipl(IPL_MCHECK - 1); mcheck_taken(cpu) = 0; mcheck_expected(cpu) = 1; @@ -338,9 +343,13 @@ tsunami_init_one_pchip(tsunami_pchip *pchip, int index) * because of an idiot-syncrasy of the CYPRESS chip. It may * respond to a PCI bus address in the last 1MB of the 4GB * address range. + * + * Note that the TLB lookup logic uses bitwise concatenation, + * not addition, so the required arena alignment is based on + * the size of the window. */ - hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, PAGE_SIZE); - hose->sg_pci = iommu_arena_new(0xc0000000, 0x08000000, PAGE_SIZE); + hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, 0x00800000>>10); + hose->sg_pci = iommu_arena_new(0xc0000000, 0x08000000, 0x08000000>>10); __direct_map_base = 0x40000000; __direct_map_size = 0x80000000; @@ -399,8 +408,6 @@ tsunami_init_arch(void) printk("%s: CSR_STR 0x%lx\n", FN, TSUNAMI_dchip->str.csr); printk("%s: CSR_DREV 0x%lx\n", FN, TSUNAMI_dchip->drev.csr); #endif - TSUNAMI_bootcpu = __hard_smp_processor_id(); - /* With multiple PCI busses, we play with I/O as physical addrs. */ ioport_resource.end = ~0UL; iomem_resource.end = ~0UL; @@ -444,12 +451,10 @@ tsunami_kill_arch(int mode) static inline void tsunami_pci_clr_err_1(tsunami_pchip *pchip) { - unsigned int jd; - - jd = pchip->perror.csr; + pchip->perror.csr; pchip->perror.csr = 0x040; mb(); - jd = pchip->perror.csr; + pchip->perror.csr; } static inline void diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 3d593acf3..613a633ba 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -48,6 +48,12 @@ unsigned long __irq_attempt[NR_IRQS]; #define ACTUAL_NR_IRQS NR_IRQS #endif +/* Hack minimum IPL during interupt processing for broken hardware. */ + +#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK +int __min_ipl; +#endif + /* * Performance counter hook. A module can override this to * do something useful. @@ -283,30 +289,32 @@ handle_IRQ_event(unsigned int irq, struct pt_regs *regs, struct irqaction *action) { int status, cpu = smp_processor_id(); - unsigned long ipl; + int old_ipl, ipl; kstat.irqs[cpu][irq]++; irq_enter(cpu, irq); status = 1; /* Force the "do bottom halves" bit */ - ipl = rdps() & 7; + old_ipl = ipl = getipl(); do { - unsigned long newipl = (action->flags & SA_INTERRUPT ? 7 : 0); - if (newipl != ipl) { - swpipl(newipl); - ipl = newipl; + int new_ipl = IPL_MIN; + if (action->flags & SA_INTERRUPT) + new_ipl = IPL_MAX; + if (new_ipl != ipl) { + setipl(new_ipl); + ipl = new_ipl; } status |= action->flags; action->handler(irq, action->dev_id, regs); action = action->next; } while (action); + if (ipl != old_ipl) + setipl(old_ipl); + if (status & SA_SAMPLE_RANDOM) add_interrupt_randomness(irq); - if (ipl == 0) - __cli(); - irq_exit(cpu, irq); return status; @@ -325,7 +333,7 @@ disable_irq_nosync(unsigned int irq) spin_lock_irqsave(&irq_controller_lock, flags); if (!irq_desc[irq].depth++) { - irq_desc[irq].status |= IRQ_DISABLED; + irq_desc[irq].status |= IRQ_DISABLED | IRQ_MASKED; irq_desc[irq].handler->disable(irq); } spin_unlock_irqrestore(&irq_controller_lock, flags); @@ -356,14 +364,15 @@ enable_irq(unsigned int irq) switch (irq_desc[irq].depth) { case 1: { - unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED; - irq_desc[irq].status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - irq_desc[irq].status = status | IRQ_REPLAY; + unsigned int status = irq_desc[irq].status; + status &= ~(IRQ_DISABLED | IRQ_MASKED); + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + status |= IRQ_REPLAY; /* ??? We can't re-send on (most?) alpha hw. hw_resend_irq(irq_desc[irq].handler,irq); */ } + irq_desc[irq].status = status; irq_desc[irq].handler->enable(irq); /* fall-through */ } @@ -425,7 +434,7 @@ setup_irq(unsigned int irq, struct irqaction * new) if (!shared) { irq_desc[irq].depth = 0; - irq_desc[irq].status &= ~IRQ_DISABLED; + irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_MASKED); irq_desc[irq].handler->startup(irq); } spin_unlock_irqrestore(&irq_controller_lock,flags); @@ -500,7 +509,7 @@ free_irq(unsigned int irq, void *dev_id) /* Found - now remove it from the list of entries. */ *pp = action->next; if (!irq_desc[irq].action) { - irq_desc[irq].status |= IRQ_DISABLED; + irq_desc[irq].status |= IRQ_DISABLED|IRQ_MASKED; irq_desc[irq].handler->shutdown(irq); } spin_unlock_irqrestore(&irq_controller_lock,flags); @@ -669,7 +678,7 @@ __global_cli(void) * Maximize ipl. If ipl was previously 0 and if this thread * is not in an irq, then take global_irq_lock. */ - if (swpipl(7) == 0 && !local_irq_count(cpu)) + if (swpipl(IPL_MAX) == IPL_MIN && !local_irq_count(cpu)) get_irqlock(cpu, where); } @@ -841,13 +850,25 @@ handle_irq(int irq, struct pt_regs * regs) desc = irq_desc + irq; spin_lock_irq(&irq_controller_lock); /* mask also the RTC */ desc->handler->ack(irq); + status = desc->status; + +#ifndef CONFIG_SMP + /* Look for broken irq masking. */ + if (status & IRQ_MASKED) { + static unsigned long last_printed; + if (time_after(jiffies, last_printed+HZ)) { + printk(KERN_CRIT "Mask didn't work for irq %d!\n", irq); + last_printed = jiffies; + } + } +#endif /* * REPLAY is when Linux resends an IRQ that was dropped earlier. * WAITING is used by probe to mark irqs that are being tested. */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ + status &= ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING | IRQ_MASKED; /* we _want_ to handle it */ /* * If the IRQ is disabled for whatever reason, we cannot @@ -890,9 +911,12 @@ handle_irq(int irq, struct pt_regs * regs) desc->status &= ~IRQ_PENDING; spin_unlock(&irq_controller_lock); } - desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED)) + status = desc->status & ~IRQ_INPROGRESS; + if (!(status & IRQ_DISABLED)) { + status &= ~IRQ_MASKED; desc->handler->end(irq); + } + desc->status = status; spin_unlock(&irq_controller_lock); } @@ -1056,7 +1080,7 @@ do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr, #ifdef CONFIG_SMP cpu_data[smp_processor_id()].smp_local_irq_count++; smp_percpu_timer_interrupt(®s); - if (smp_processor_id() == smp_boot_cpuid) + if (smp_processor_id() == boot_cpuid) #endif handle_irq(RTC_IRQ, ®s); return; diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 72ce8bcb6..f5a9bd990 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -133,6 +133,9 @@ pci_map_single(struct pci_dev *pdev, void *cpu_addr, long size, int direction) unsigned long paddr; dma_addr_t ret; + if (direction == PCI_DMA_NONE) + BUG(); + paddr = virt_to_phys(cpu_addr); /* First check to see if we can use the direct map window. */ @@ -186,12 +189,15 @@ pci_map_single(struct pci_dev *pdev, void *cpu_addr, long size, int direction) wrote there. */ void -pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, long size, int direction) +pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, long size, + int direction) { struct pci_controler *hose = pdev ? pdev->sysdata : pci_isa_hose; struct pci_iommu_arena *arena; long dma_ofs, npages; + if (direction == PCI_DMA_NONE) + BUG(); if (dma_addr >= __direct_map_base && dma_addr < __direct_map_base + __direct_map_size) { @@ -247,7 +253,8 @@ pci_alloc_consistent(struct pci_dev *pdev, long size, dma_addr_t *dma_addrp) } memset(cpu_addr, 0, size); - *dma_addrp = pci_map_single(pdev, cpu_addr, size, PCI_DMA_BIDIRECTIONAL); + *dma_addrp = pci_map_single(pdev, cpu_addr, size, + PCI_DMA_BIDIRECTIONAL); if (*dma_addrp == 0) { free_pages((unsigned long)cpu_addr, order); return NULL; @@ -424,13 +431,17 @@ sg_fill(struct scatterlist *leader, struct scatterlist *end, } int -pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direction) +pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, + int direction) { struct scatterlist *start, *end, *out; struct pci_controler *hose; struct pci_iommu_arena *arena; dma_addr_t max_dma; + if (direction == PCI_DMA_NONE) + BUG(); + /* Fast path single entry scatterlists. */ if (nents == 1) { sg->dma_length = sg->length; @@ -499,7 +510,8 @@ error: above. */ void -pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direction) +pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, + int direction) { struct pci_controler *hose; struct pci_iommu_arena *arena; @@ -507,6 +519,9 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direct dma_addr_t max_dma; dma_addr_t fstart, fend; + if (direction == PCI_DMA_NONE) + BUG(); + if (! alpha_mv.mv_pci_tbi) return; @@ -555,3 +570,33 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direct DBGA("pci_unmap_sg: %d entries\n", nents - (end - sg)); } + +/* Return whether the given PCI device DMA address mask can be + supported properly. */ + +int +pci_dma_supported(struct pci_dev *pdev, dma_addr_t mask) +{ + struct pci_controler *hose; + struct pci_iommu_arena *arena; + + /* If there exists a direct map, and the mask fits either + MAX_DMA_ADDRESS defined such that GFP_DMA does something + useful, or the total system memory as shifted by the + map base. */ + if (__direct_map_size != 0 + && (__direct_map_base + MAX_DMA_ADDRESS-IDENT_ADDR-1 <= mask + || __direct_map_base + (max_low_pfn<<PAGE_SHIFT)-1 <= mask)) + return 1; + + /* Check that we have a scatter-gather arena that fits. */ + hose = pdev ? pdev->sysdata : pci_isa_hose; + arena = hose->sg_isa; + if (arena && arena->dma_base + arena->size <= mask) + return 1; + arena = hose->sg_pci; + if (arena && arena->dma_base + arena->size <= mask) + return 1; + + return 0; +} diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 31a818209..2e462550f 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -90,55 +90,82 @@ cpu_idle(void) } } + +struct halt_info { + int mode; + char *restart_cmd; +}; + static void -common_shutdown(int mode, char *restart_cmd) +common_shutdown_1(void *generic_ptr) { - /* The following currently only has any effect on SRM. We should - fix MILO to understand it. Should be pretty easy. Also we can - support RESTART2 via the ipc_buffer machinations pictured below, - which SRM ignores. */ + struct halt_info *how = (struct halt_info *)generic_ptr; + struct percpu_struct *cpup; + unsigned long *pflags, flags; + int cpuid = smp_processor_id(); - if (alpha_using_srm) { - struct percpu_struct *cpup; - unsigned long flags; - - cpup = (struct percpu_struct *) - ((unsigned long)hwrpb + hwrpb->processor_offset); - - flags = cpup->flags; - - /* Clear reason to "default"; clear "bootstrap in progress". */ - flags &= ~0x00ff0001UL; - - if (mode == LINUX_REBOOT_CMD_RESTART) { - if (!restart_cmd) { - flags |= 0x00020000UL; /* "cold bootstrap" */ - cpup->ipc_buffer[0] = 0; - } else { - flags |= 0x00030000UL; /* "warm bootstrap" */ - strncpy((char *)cpup->ipc_buffer, restart_cmd, - sizeof(cpup->ipc_buffer)); - } + /* No point in taking interrupts anymore. */ + __cli(); + + cpup = (struct percpu_struct *) + ((unsigned long)hwrpb + hwrpb->processor_offset + + hwrpb->processor_size * cpuid); + pflags = &cpup->flags; + flags = *pflags; + + /* Clear reason to "default"; clear "bootstrap in progress". */ + flags &= ~0x00ff0001UL; + +#ifdef __SMP__ + /* Secondaries halt here. */ + if (cpuid != boot_cpuid) { + flags |= 0x00040000UL; /* "remain halted" */ + *pflags = flags; + clear_bit(cpuid, &cpu_present_mask); + halt(); + } +#endif + + if (how->mode == LINUX_REBOOT_CMD_RESTART) { + if (!how->restart_cmd) { + flags |= 0x00020000UL; /* "cold bootstrap" */ } else { - flags |= 0x00040000UL; /* "remain halted" */ + /* For SRM, we could probably set environment + variables to get this to work. We'd have to + delay this until after srm_paging_stop unless + we ever got srm_fixup working. + + At the moment, SRM will use the last boot device, + but the file and flags will be the defaults, when + doing a "warm" bootstrap. */ + flags |= 0x00030000UL; /* "warm bootstrap" */ } - - cpup->flags = flags; - mb(); + } else { + flags |= 0x00040000UL; /* "remain halted" */ + } + *pflags = flags; - /* reset_for_srm(); */ - set_hae(srm_hae); +#ifdef __SMP__ + /* Wait for the secondaries to halt. */ + clear_bit(boot_cpuid, &cpu_present_mask); + while (cpu_present_mask) + barrier(); +#endif + /* If booted from SRM, reset some of the original environment. */ + if (alpha_using_srm) { #ifdef CONFIG_DUMMY_CONSOLE - /* This has the effect of reseting the VGA video origin. */ + /* This has the effect of resetting the VGA video origin. */ take_over_console(&dummy_con, 0, MAX_NR_CONSOLES-1, 1); #endif + /* reset_for_srm(); */ + set_hae(srm_hae); } if (alpha_mv.kill_arch) - alpha_mv.kill_arch(mode); + alpha_mv.kill_arch(how->mode); - if (!alpha_using_srm && mode != LINUX_REBOOT_CMD_RESTART) { + if (! alpha_using_srm && how->mode != LINUX_REBOOT_CMD_RESTART) { /* Unfortunately, since MILO doesn't currently understand the hwrpb bits above, we can't reliably halt the processor and keep it halted. So just loop. */ @@ -151,6 +178,18 @@ common_shutdown(int mode, char *restart_cmd) halt(); } +static void +common_shutdown(int mode, char *restart_cmd) +{ + struct halt_info args; + args.mode = mode; + args.restart_cmd = restart_cmd; +#ifdef __SMP__ + smp_call_function(common_shutdown_1, &args, 1, 0); +#endif + common_shutdown_1(&args); +} + void machine_restart(char *restart_cmd) { diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index dd63de4d2..a8859059b 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -74,13 +74,14 @@ extern void tsunami_pci_tbi(struct pci_controler *, dma_addr_t, dma_addr_t); /* setup.c */ extern unsigned long srm_hae; +extern int boot_cpuid; /* smp.c */ extern void setup_smp(void); extern int smp_info(char *buffer); extern void handle_ipi(struct pt_regs *); extern void smp_percpu_timer_interrupt(struct pt_regs *); -extern int smp_boot_cpuid; +extern unsigned long cpu_present_mask; /* bios32.c */ /* extern void reset_for_srm(void); */ diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c index d4793ecb4..dc5209531 100644 --- a/arch/alpha/kernel/semaphore.c +++ b/arch/alpha/kernel/semaphore.c @@ -173,7 +173,7 @@ __down_read(struct rw_semaphore *sem, int count) " subl %0,1,%0\n" " stl_c %2,%1\n" " bne %2,2f\n" - ".section .text2,\"ax\"\n" + ".subsection 2\n" "2: br 1b\n" ".previous" : "=r"(count), "=m"(sem->count), "=r"(tmp) @@ -226,7 +226,7 @@ __down_write(struct rw_semaphore *sem, int count) " ldah %0,%3(%0)\n" " stl_c %2,%1\n" " bne %2,2f\n" - ".section .text2,\"ax\"\n" + ".subsection 2\n" "2: br 1b\n" ".previous" : "=r"(count), "=m"(sem->count), "=r"(tmp) diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 112976bcb..1311d939b 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -50,6 +50,9 @@ struct hwrpb_struct *hwrpb; unsigned long srm_hae; +/* Which processor we booted from. */ +int boot_cpuid; + #ifdef CONFIG_ALPHA_GENERIC struct alpha_machine_vector alpha_mv; int alpha_using_srm; @@ -351,6 +354,7 @@ setup_arch(char **cmdline_p) char *type_name, *var_name, *p; hwrpb = (struct hwrpb_struct*) __va(INIT_HWRPB->phys_addr); + boot_cpuid = hard_smp_processor_id(); /* * Locate the command line. diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index e3ae30973..be1a6440e 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -62,11 +62,13 @@ spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; /* Set to a secondary's cpuid when it comes online. */ static unsigned long smp_secondary_alive; -unsigned long cpu_present_mask; /* Which cpus ids came online. */ -static unsigned long __cpu_present_mask __initdata = 0; /* cpu reported in the hwrpb */ +/* Which cpus ids came online. */ +unsigned long cpu_present_mask; + +/* cpus reported in the hwrpb */ +static unsigned long hwrpb_cpu_present_mask __initdata = 0; static int max_cpus = -1; /* Command-line limitation. */ -int smp_boot_cpuid; /* Which processor we booted from. */ int smp_num_probed; /* Internal processor count */ int smp_num_cpus = 1; /* Number that came online. */ int smp_threads_ready; /* True once the per process idle is forked. */ @@ -486,10 +488,9 @@ setup_smp(void) struct percpu_struct *cpubase, *cpu; int i; - smp_boot_cpuid = hard_smp_processor_id(); - if (smp_boot_cpuid != 0) { + if (boot_cpuid != 0) { printk(KERN_WARNING "SMP: Booting off cpu %d instead of 0?\n", - smp_boot_cpuid); + boot_cpuid); } if (hwrpb->nr_processors > 1) { @@ -508,7 +509,7 @@ setup_smp(void) if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; /* Assume here that "whami" == index */ - __cpu_present_mask |= (1L << i); + hwrpb_cpu_present_mask |= (1L << i); cpu->pal_revision = boot_cpu_palrev; } @@ -519,12 +520,12 @@ setup_smp(void) } } else { smp_num_probed = 1; - __cpu_present_mask = (1L << smp_boot_cpuid); + hwrpb_cpu_present_mask = (1L << boot_cpuid); } - cpu_present_mask = 1L << smp_boot_cpuid; + cpu_present_mask = 1L << boot_cpuid; printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n", - smp_num_probed, __cpu_present_mask); + smp_num_probed, hwrpb_cpu_present_mask); } /* @@ -541,13 +542,13 @@ smp_boot_cpus(void) memset(__cpu_logical_map, -1, sizeof(__cpu_logical_map)); memset(ipi_data, 0, sizeof(ipi_data)); - __cpu_number_map[smp_boot_cpuid] = 0; - __cpu_logical_map[0] = smp_boot_cpuid; - current->processor = smp_boot_cpuid; + __cpu_number_map[boot_cpuid] = 0; + __cpu_logical_map[0] = boot_cpuid; + current->processor = boot_cpuid; - smp_store_cpu_info(smp_boot_cpuid); + smp_store_cpu_info(boot_cpuid); smp_tune_scheduling(); - smp_setup_percpu_timer(smp_boot_cpuid); + smp_setup_percpu_timer(boot_cpuid); init_idle(); @@ -565,10 +566,10 @@ smp_boot_cpus(void) cpu_count = 1; for (i = 0; i < NR_CPUS; i++) { - if (i == smp_boot_cpuid) + if (i == boot_cpuid) continue; - if (((__cpu_present_mask >> i) & 1) == 0) + if (((hwrpb_cpu_present_mask >> i) & 1) == 0) continue; if (smp_boot_one_cpu(i, cpu_count)) @@ -1023,7 +1024,7 @@ debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no) " stl_c %0,%1\n" " beq %0,3f\n" "4: mb\n" - ".section .text2,\"ax\"\n" + ".subsection 2\n" "2: ldl %0,%1\n" " subq %2,1,%2\n" "3: blt %2,4b\n" @@ -1097,7 +1098,7 @@ void write_lock(rwlock_t * lock) " stl_c %1,%0\n" " beq %1,6f\n" "4: mb\n" - ".section .text2,\"ax\"\n" + ".subsection 2\n" "6: blt %3,4b # debug\n" " subl %3,1,%3 # debug\n" " ldl %1,%0\n" @@ -1140,7 +1141,7 @@ void read_lock(rwlock_t * lock) " stl_c %1,%0;" " beq %1,6f;" "4: mb\n" - ".section .text2,\"ax\"\n" + ".subsection 2\n" "6: ldl %1,%0;" " blt %2,4b # debug\n" " subl %2,1,%2 # debug\n" diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index 1432496d8..acea58d1e 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -126,6 +126,30 @@ cabriolet_init_irq(void) setup_irq(16+4, &isa_cascade_irqaction); } +#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PC164) +static void +pc164_device_interrupt(unsigned long v, struct pt_regs *r) +{ + /* In theory, the PC164 has the same interrupt hardware as + the other Cabriolet based systems. However, something + got screwed up late in the development cycle which broke + the interrupt masking hardware. Repeat, it is not + possible to mask and ack interrupts. At all. + + In an attempt to work around this, while processing + interrupts, we do not allow the IPL to drop below what + it is currently. This prevents the possibility of + recursion. + + ??? Another option might be to force all PCI devices + to use edge triggered rather than level triggered + interrupts. That might be too invasive though. */ + + __min_ipl = getipl(); + cabriolet_device_interrupt(v, r); + __min_ipl = 0; +} +#endif /* * The EB66+ is very similar to the EB66 except that it does not have @@ -379,7 +403,7 @@ struct alpha_machine_vector pc164_mv __initmv = { min_mem_address: CIA_DEFAULT_MEM_BASE, nr_irqs: 35, - device_interrupt: cabriolet_device_interrupt, + device_interrupt: pc164_device_interrupt, init_arch: cia_init_arch, init_irq: cabriolet_init_irq, diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index fbebdd5a5..7414b8cc2 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -33,94 +33,80 @@ #include "machvec_impl.h" +/* Note mask bit is true for ENABLED irqs. */ static unsigned long cached_irq_mask; - -#define TSUNAMI_SET_IRQ_MASK(cpu, value) \ -do { \ - volatile unsigned long *csr; \ - csr = &TSUNAMI_cchip->dim##cpu##.csr; \ - *csr = (value); \ - mb(); \ - *csr; \ -} while(0) - -static inline void -do_flush_irq_mask(unsigned long value) -{ - switch (TSUNAMI_bootcpu) { - case 0: - TSUNAMI_SET_IRQ_MASK(0, value); - break; - case 1: - TSUNAMI_SET_IRQ_MASK(1, value); - break; - case 2: - TSUNAMI_SET_IRQ_MASK(2, value); - break; - case 3: - TSUNAMI_SET_IRQ_MASK(3, value); - break; - } -} - -#ifdef CONFIG_SMP -static inline void -do_flush_smp_irq_mask(unsigned long value) -{ - extern unsigned long cpu_present_mask; - unsigned long other_cpus = cpu_present_mask & ~(1L << TSUNAMI_bootcpu); - - if (other_cpus & 1) - TSUNAMI_SET_IRQ_MASK(0, value); - if (other_cpus & 2) - TSUNAMI_SET_IRQ_MASK(1, value); - if (other_cpus & 4) - TSUNAMI_SET_IRQ_MASK(2, value); - if (other_cpus & 8) - TSUNAMI_SET_IRQ_MASK(3, value); -} -#endif - static void -dp264_flush_irq_mask(unsigned long mask) +tsunami_update_irq_hw(unsigned long mask, unsigned long isa_enable) { - unsigned long value; + register tsunami_cchip *cchip = TSUNAMI_cchip; + register int bcpu = boot_cpuid; #ifdef CONFIG_SMP - do_flush_smp_irq_mask(mask); + register unsigned long cpm = cpu_present_mask; + volatile unsigned long *dim0, *dim1, *dim2, *dim3; + unsigned long mask0, mask1, mask2, mask3, maskB, dummy; + + mask0 = mask1 = mask2 = mask3 = mask; + maskB = mask | isa_enable; + if (bcpu == 0) mask0 = maskB; + if (bcpu == 1) mask1 = maskB; + if (bcpu == 2) mask2 = maskB; + if (bcpu == 3) mask3 = maskB; + + dim0 = &cchip->dim0.csr; + dim1 = &cchip->dim1.csr; + dim2 = &cchip->dim2.csr; + dim3 = &cchip->dim3.csr; + if ((cpm & 1) == 0) dim0 = &dummy; + if ((cpm & 2) == 0) dim1 = &dummy; + if ((cpm & 4) == 0) dim2 = &dummy; + if ((cpm & 8) == 0) dim3 = &dummy; + + *dim0 = mask0; + *dim1 = mask1; + *dim2 = mask2; + *dim3 = mask3; + mb(); + *dim0; + *dim1; + *dim2; + *dim3; +#else + volatile unsigned long *dimB = &cchip->dim1.csr; + if (bcpu == 0) dimB = &cchip->dim0.csr; + if (bcpu == 2) dimB = &cchip->dim2.csr; + if (bcpu == 3) dimB = &cchip->dim3.csr; + *dimB = mask | isa_enable; + mb(); + *dimB; #endif - - value = mask | (1UL << 55) | 0xffff; /* isa irqs always enabled */ - do_flush_irq_mask(value); } -static void -clipper_flush_irq_mask(unsigned long mask) +static inline void +dp264_update_irq_hw(unsigned long mask) { - unsigned long value; - - value = mask >> 16; -#ifdef CONFIG_SMP - do_flush_smp_irq_mask(value); -#endif + tsunami_update_irq_hw(mask, (1UL << 55) | 0xffff); +} - value = value | (1UL << 55); /* master ISA enable */ - do_flush_irq_mask(value); +static inline void +clipper_update_irq_hw(unsigned long mask) +{ + tsunami_update_irq_hw(mask, 1UL << 55); } static inline void dp264_enable_irq(unsigned int irq) { cached_irq_mask |= 1UL << irq; - dp264_flush_irq_mask(cached_irq_mask); + dp264_update_irq_hw(cached_irq_mask); } static void dp264_disable_irq(unsigned int irq) { cached_irq_mask &= ~(1UL << irq); - dp264_flush_irq_mask(cached_irq_mask); + dp264_update_irq_hw(cached_irq_mask); } static unsigned int @@ -134,14 +120,14 @@ static inline void clipper_enable_irq(unsigned int irq) { cached_irq_mask |= 1UL << irq; - clipper_flush_irq_mask(cached_irq_mask); + clipper_update_irq_hw(cached_irq_mask); } static void clipper_disable_irq(unsigned int irq) { cached_irq_mask &= ~(1UL << irq); - clipper_flush_irq_mask(cached_irq_mask); + clipper_update_irq_hw(cached_irq_mask); } static unsigned int @@ -271,7 +257,7 @@ dp264_init_irq(void) if (alpha_using_srm) alpha_mv.device_interrupt = dp264_srm_device_interrupt; - dp264_flush_irq_mask(0UL); + dp264_update_irq_hw(0UL); init_i8259a_irqs(); init_rtc_irq(); @@ -289,7 +275,7 @@ clipper_init_irq(void) if (alpha_using_srm) alpha_mv.device_interrupt = clipper_srm_device_interrupt; - clipper_flush_irq_mask(0UL); + clipper_update_irq_hw(0UL); init_i8259a_irqs(); init_rtc_irq(); diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index ccdcf3bdb..0230ec6d9 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -391,7 +391,7 @@ struct alpha_machine_vector xl_mv __initmv = { nr_irqs: 16, device_interrupt: isa_device_interrupt, - init_arch: lca_init_arch, + init_arch: apecs_init_arch, init_irq: sio_init_irq, init_rtc: common_init_rtc, init_pci: noname_init_pci, diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 8211045e8..d7b5cee8c 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -22,7 +22,6 @@ * fixed algorithm in do_gettimeofday() for calculating the precise time * from processor cycle counter (now taking lost_ticks into account) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 36b0cc43a..828044b24 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -215,8 +215,10 @@ do_entIF(unsigned long type, unsigned long a1, /* EV4 does not implement anything except normal rounding. Everything else will come here as an illegal instruction. Emulate them. */ - if (alpha_fp_emul(regs.pc - 4)) + if (alpha_fp_emul(regs.pc)) { + regs.pc += 4; return; + } } send_sig(SIGILL, current, 1); break; diff --git a/arch/alpha/vmlinux.lds b/arch/alpha/vmlinux.lds index 4b49a5369..4eaac4e42 100644 --- a/arch/alpha/vmlinux.lds +++ b/arch/alpha/vmlinux.lds @@ -5,7 +5,6 @@ SECTIONS . = 0xfffffc0000310000; _text = .; .text : { *(.text) } - .text2 : { *(.text2) } _etext = .; /* Exception table */ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 63d4631a4..c445daeee 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -165,7 +165,6 @@ EXPORT_SYMBOL_NOVERS(strncmp); EXPORT_SYMBOL_NOVERS(strchr); EXPORT_SYMBOL_NOVERS(strlen); EXPORT_SYMBOL_NOVERS(strnlen); -EXPORT_SYMBOL_NOVERS(strspn); EXPORT_SYMBOL_NOVERS(strpbrk); EXPORT_SYMBOL_NOVERS(strtok); EXPORT_SYMBOL_NOVERS(strrchr); diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index 8673d6c1d..7101b2936 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -23,7 +23,6 @@ void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle) { int order; unsigned long page; - struct vm_struct *area; void *ret; if (in_interrupt()) @@ -40,15 +39,10 @@ void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle) *dma_handle = virt_to_bus((void *)page); - area = get_vm_area(size, VM_IOREMAP); /* maybe new type? */ - if (!area) - goto no_area; - ret = __ioremap(virt_to_phys((void *)page), PAGE_SIZE << order, 0); if (ret) return ret; -no_area: free_pages(page, order); no_page: BUG(); diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 4ca545255..34b453c2c 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -109,10 +109,12 @@ CONFIG_BLK_DEV_CMD640=y # CONFIG_BLK_DEV_ISAPNP is not set CONFIG_BLK_DEV_RZ1000=y CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y # CONFIG_BLK_DEV_IDEDMA_PCI is not set # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_AEC6210 is not set # CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_CS5530 is not set # CONFIG_IDE_CHIPSETS is not set # CONFIG_BLK_CPQ_DA is not set @@ -393,6 +395,7 @@ CONFIG_PSMOUSE=y # CONFIG_WATCHDOG is not set # CONFIG_NVRAM is not set # CONFIG_RTC is not set +# CONFIG_EFI_RTC is not set # # Video For Linux diff --git a/arch/i386/kernel/acpi.c b/arch/i386/kernel/acpi.c index 9bdd111d1..6228805db 100644 --- a/arch/i386/kernel/acpi.c +++ b/arch/i386/kernel/acpi.c @@ -34,6 +34,7 @@ #include <linux/spinlock.h> #include <linux/ioport.h> #include <linux/slab.h> +#include <linux/mm.h> #include <linux/pci.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -379,13 +380,14 @@ static struct acpi_table *__init acpi_map_table(u32 addr) ioremap((unsigned long) addr, table_size); } - if (!table) { - /* ioremap is a pain, it returns NULL if the - * table starts within mapped physical memory. - * Hopefully, no table straddles a mapped/unmapped - * physical memory boundary, ugh + if (!table && addr < virt_to_phys(high_memory)) { + /* sometimes we see ACPI tables in low memory + * and not reserved by the memory map (E820) code, + * who is at fault for this? BIOS? */ - table = (struct acpi_table*) phys_to_virt(addr); + printk(KERN_ERR + "ACPI: unreserved table memory @ 0x%p!\n", + (void*) addr); } } return table; @@ -933,9 +935,9 @@ static int acpi_enter_dx(acpi_dstate_t state) int status = 0; if (state == ACPI_D0) - status = pm_send_request(PM_RESUME, (void*) state); + status = pm_send_all(PM_RESUME, (void*) state); else - status = pm_send_request(PM_SUSPEND, (void*) state); + status = pm_send_all(PM_SUSPEND, (void*) state); return status; } @@ -1333,10 +1335,7 @@ static int __init acpi_init(void) if (acpi_claim_ioports(acpi_facp)) { printk(KERN_ERR "ACPI: I/O port allocation failed\n"); - if (pci_driver_registered) - pci_unregister_driver(&acpi_driver); - acpi_destroy_tables(); - return -ENODEV; + goto err_out; } if (acpi_facp->sci_int @@ -1347,12 +1346,7 @@ static int __init acpi_init(void) acpi_facp)) { printk(KERN_ERR "ACPI: SCI (IRQ%d) allocation failed\n", acpi_facp->sci_int); - - if (pci_driver_registered) - pci_unregister_driver(&acpi_driver); - acpi_destroy_tables(); - - return -ENODEV; + goto err_out; } acpi_sysctl = register_sysctl_table(acpi_dir_table, 1); @@ -1379,6 +1373,13 @@ static int __init acpi_init(void) pm_idle = acpi_idle; return 0; + +err_out: + if (pci_driver_registered) + pci_unregister_driver(&acpi_driver); + acpi_destroy_tables(); + + return -ENODEV; } /* diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 4ec5e7993..3d403b93c 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -333,7 +333,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user * user_list = NULL; -static char driver_version[] = "1.12"; /* no spaces */ +static char driver_version[] = "1.13"; /* no spaces */ static char * apm_event_name[] = { "system standby", @@ -590,7 +590,11 @@ static void apm_cpu_idle(void) continue; if (hlt_counter) continue; - asm volatile("sti ; hlt" : : : "memory"); + asm volatile("cli" : : : "memory"); + if (!current->need_resched) + asm volatile("sti ; hlt" : : : "memory"); + else + asm volatile("sti" : : : "memory"); continue; } @@ -635,7 +639,7 @@ static void apm_power_off(void) */ #ifdef CONFIG_SMP /* Some bioses don't like being called from CPU != 0 */ - while (cpu_number_map[smp_processor_id()] != 0) { + while (cpu_number_map(smp_processor_id()) != 0) { kernel_thread(apm_magic, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND | SIGCHLD); schedule(); @@ -916,7 +920,7 @@ static int send_event(apm_event_t event, struct apm_user *sender) case APM_CRITICAL_SUSPEND: case APM_USER_SUSPEND: /* map all suspends to ACPI D3 */ - if (pm_send_request(PM_SUSPEND, (void *)3)) { + if (pm_send_all(PM_SUSPEND, (void *)3)) { if (apm_bios_info.version > 0x100) apm_set_power_state(APM_STATE_REJECT); return 0; @@ -925,7 +929,7 @@ static int send_event(apm_event_t event, struct apm_user *sender) case APM_NORMAL_RESUME: case APM_CRITICAL_RESUME: /* map all resumes to ACPI D0 */ - (void) pm_send_request(PM_RESUME, (void *)0); + (void) pm_send_all(PM_RESUME, (void *)0); break; } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index bcca244c1..0c3cae5d9 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -181,6 +181,8 @@ ret_from_fork: call SYMBOL_NAME(schedule_tail) addl $4, %esp GET_CURRENT(%ebx) + testb $0x20,flags(%ebx) # PF_TRACESYS + jne tracesys_exit jmp ret_from_sys_call /* diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index cad6ceb17..a3389c5f0 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -144,6 +144,4 @@ EXPORT_SYMBOL(screen_info); EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(local_bh_count); -EXPORT_SYMBOL(local_irq_count); +EXPORT_SYMBOL(irq_stat); diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index d54f9b503..ec33f2269 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -127,11 +127,14 @@ void (*interrupt[NR_IRQS])(void) = { * moves to arch independent land */ -void enable_8259A_irq(unsigned int irq); -void disable_8259A_irq(unsigned int irq); +static spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; + +static void end_8259A_irq (unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_DISABLED)) + enable_8259A_irq(irq); +} -/* shutdown is same as "disable" */ -#define end_8259A_irq enable_8259A_irq #define shutdown_8259A_irq disable_8259A_irq void mask_and_ack_8259A(unsigned int); @@ -149,7 +152,8 @@ static struct hw_interrupt_type i8259A_irq_type = { enable_8259A_irq, disable_8259A_irq, mask_and_ack_8259A, - end_8259A_irq + end_8259A_irq, + NULL }; /* @@ -183,30 +187,45 @@ unsigned long io_apic_irqs = 0; void disable_8259A_irq(unsigned int irq) { unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_A1,0xA1); else outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) { unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_A1,0xA1); else outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) { unsigned int mask = 1<<irq; + unsigned long flags; + int ret; + spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) - return (inb(0x20) & mask); - return (inb(0xA0) & (mask >> 8)); + ret = inb(0x20) & mask; + else + ret = inb(0xA0) & (mask >> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; } void make_8259A_irq(unsigned int irq) @@ -247,7 +266,9 @@ static inline int i8259A_irq_real(unsigned int irq) void mask_and_ack_8259A(unsigned int irq) { unsigned int irqmask = 1 << irq; + unsigned long flags; + spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -278,6 +299,7 @@ handle_real_irq: outb(cached_21,0x21); outb(0x20,0x20); /* 'generic EOI' to master */ } + spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 75b2bfb9f..129a587f0 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -28,6 +28,8 @@ #include <asm/smp.h> #include <asm/desc.h> +static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; + /* * # of IO-APICs and # of IRQ routing registers */ @@ -87,9 +89,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } -#define DO_ACTION(name,R,ACTION, FINAL) \ +#define __DO_ACTION(name,R,ACTION, FINAL) \ \ -static void name##_IO_APIC_irq(unsigned int irq) \ { \ int pin; \ struct irq_pin_list *entry = irq_2_pin + irq; \ @@ -109,8 +110,31 @@ static void name##_IO_APIC_irq(unsigned int irq) \ FINAL; \ } -DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ -DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ +static void name##_IO_APIC_irq(unsigned int irq) \ +__DO_ACTION(name,R,ACTION, FINAL) + +DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { @@ -537,7 +561,7 @@ void __init setup_IO_APIC_irqs(void) entry.delivery_mode = dest_LowestPrio; entry.dest_mode = 1; /* logical delivery */ entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = APIC_ALL_CPUS; /* all CPUs */ + entry.dest.logical.logical_dest = APIC_ALL_CPUS; idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -1026,16 +1050,16 @@ extern atomic_t nmi_counter[NR_CPUS]; static int __init nmi_irq_works(void) { - atomic_t tmp[NR_CPUS]; + irq_cpustat_t tmp[NR_CPUS]; int j, cpu; - memcpy(tmp, nmi_counter, sizeof(tmp)); + memcpy(tmp, irq_stat, sizeof(tmp)); sti(); mdelay(50); for (j = 0; j < smp_num_cpus; j++) { cpu = cpu_logical_map(j); - if (atomic_read(nmi_counter+cpu) - atomic_read(tmp+cpu) <= 3) { + if (atomic_read(&nmi_counter(cpu)) - atomic_read(&tmp[cpu].__nmi_counter) <= 3) { printk("CPU#%d NMI appears to be stuck.\n", cpu); return 0; } @@ -1055,14 +1079,9 @@ static int __init nmi_irq_works(void) * that was delayed but this is now handled in the device * independent code. */ -static void enable_edge_ioapic_irq(unsigned int irq) -{ - unmask_IO_APIC_irq(irq); -} +#define enable_edge_ioapic_irq unmask_IO_APIC_irq -static void disable_edge_ioapic_irq(unsigned int irq) -{ -} +static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } /* * Starting up a edge-triggered IO-APIC interrupt is @@ -1077,12 +1096,17 @@ static void disable_edge_ioapic_irq(unsigned int irq) static unsigned int startup_edge_ioapic_irq(unsigned int irq) { int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); if (irq < 16) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; } - enable_edge_ioapic_irq(irq); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + return was_pending; } @@ -1093,14 +1117,15 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) * interrupt for real. This prevents IRQ storms from unhandled * devices. */ -void static ack_edge_ioapic_irq(unsigned int irq) +static void ack_edge_ioapic_irq(unsigned int irq) { if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); ack_APIC_irq(); } -void static end_edge_ioapic_irq(unsigned int i){} + +static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } /* @@ -1108,23 +1133,46 @@ void static end_edge_ioapic_irq(unsigned int i){} * and shutting down and starting up the interrupt * is the same as enabling and disabling them -- except * with a startup need to return a "was pending" value. + * + * Level triggered interrupts are special because we + * do not touch any IO-APIC register while handling + * them. We ack the APIC in the end-IRQ handler, not + * in the start-IRQ-handler. Protection against reentrance + * from the same interrupt is still provided, both by the + * generic IRQ layer and by the fact that an unacked local + * APIC does not accept IRQs. */ -static unsigned int startup_level_ioapic_irq(unsigned int irq) +static unsigned int startup_level_ioapic_irq (unsigned int irq) { unmask_IO_APIC_irq(irq); + return 0; /* don't check for pending */ } #define shutdown_level_ioapic_irq mask_IO_APIC_irq #define enable_level_ioapic_irq unmask_IO_APIC_irq #define disable_level_ioapic_irq mask_IO_APIC_irq -#define end_level_ioapic_irq unmask_IO_APIC_irq -void static mask_and_ack_level_ioapic_irq(unsigned int i) + +static void end_level_ioapic_irq (unsigned int i) { - mask_IO_APIC_irq(i); ack_APIC_irq(); } +static void mask_and_ack_level_ioapic_irq (unsigned int i) { /* nothing */ } + +static void set_ioapic_affinity (unsigned int irq, unsigned int mask) +{ + unsigned long flags; + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION( target, 1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + /* * Level and edge triggered IO-APIC interrupts need different handling, * so we use two separate IRQ descriptors. Edge triggered IRQs can be @@ -1141,7 +1189,8 @@ static struct hw_interrupt_type ioapic_edge_irq_type = { enable_edge_ioapic_irq, disable_edge_ioapic_irq, ack_edge_ioapic_irq, - end_edge_ioapic_irq + end_edge_ioapic_irq, + set_ioapic_affinity, }; static struct hw_interrupt_type ioapic_level_irq_type = { @@ -1151,7 +1200,8 @@ static struct hw_interrupt_type ioapic_level_irq_type = { enable_level_ioapic_irq, disable_level_ioapic_irq, mask_and_ack_level_ioapic_irq, - end_level_ioapic_irq + end_level_ioapic_irq, + set_ioapic_affinity, }; static inline void init_IO_APIC_traps(void) @@ -1185,12 +1235,12 @@ static inline void init_IO_APIC_traps(void) } } -void static ack_lapic_irq (unsigned int irq) +static void ack_lapic_irq (unsigned int irq) { ack_APIC_irq(); } -void static end_lapic_irq (unsigned int i) { /* nothing */ } +static void end_lapic_irq (unsigned int i) { /* nothing */ } static struct hw_interrupt_type lapic_irq_type = { "local-APIC-edge", diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 9d4a81041..7054249e6 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -31,21 +31,20 @@ #include <linux/init.h> #include <linux/kernel_stat.h> #include <linux/irq.h> +#include <linux/proc_fs.h> +#include <linux/irq.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/bitops.h> +#include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/delay.h> #include <asm/desc.h> #include <asm/irq.h> -unsigned int local_bh_count[NR_CPUS]; -unsigned int local_irq_count[NR_CPUS]; - -extern atomic_t nmi_counter[NR_CPUS]; /* * Linux has a controller-independent x86 interrupt architecture. @@ -63,17 +62,15 @@ extern atomic_t nmi_counter[NR_CPUS]; * interrupt controllers, without having to do assembly magic. */ -/* - * Micro-access to controllers is serialized over the whole - * system. We never hold this lock when we call the actual - * IRQ handler. - */ -spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED; +irq_cpustat_t irq_stat [NR_CPUS]; + /* * Controller mappings for all interrupt sources: */ irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = - { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }}; + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +static void register_irq_proc (unsigned int irq); /* * Special irq handlers. @@ -164,7 +161,7 @@ int get_irq_list(char *buf) p += sprintf(p, "NMI: "); for (j = 0; j < smp_num_cpus; j++) p += sprintf(p, "%10u ", - atomic_read(nmi_counter+cpu_logical_map(j))); + atomic_read(&nmi_counter(cpu_logical_map(j)))); p += sprintf(p, "\n"); #if CONFIG_SMP p += sprintf(p, "LOC: "); @@ -186,7 +183,6 @@ int get_irq_list(char *buf) #ifdef CONFIG_SMP unsigned char global_irq_holder = NO_PROC_ID; unsigned volatile int global_irq_lock; -atomic_t global_irq_count; static void show(char * str) { @@ -196,9 +192,9 @@ static void show(char * str) printk("\n%s, CPU %d:\n", str, cpu); printk("irq: %d [%d %d]\n", - atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]); + irqs_running(), local_irq_count(0), local_irq_count(1)); printk("bh: %d [%d %d]\n", - spin_is_locked(&global_bh_lock) ? 1 : 0, local_bh_count[0], local_bh_count[1]); + spin_is_locked(&global_bh_lock) ? 1 : 0, local_bh_count(0), local_bh_count(1)); stack = (unsigned long *) &stack; for (i = 40; i ; i--) { unsigned long x = *++stack; @@ -248,10 +244,9 @@ static inline void wait_on_irq(int cpu) * for bottom half handlers unless we're * already executing in one.. */ - if (!atomic_read(&global_irq_count)) { - if (local_bh_count[cpu] || !spin_is_locked(&global_bh_lock)) + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) break; - } /* Duh, we have to loop. Release the lock to avoid deadlocks */ clear_bit(0,&global_irq_lock); @@ -264,11 +259,11 @@ static inline void wait_on_irq(int cpu) __sti(); SYNC_OTHER_CORES(cpu); __cli(); - if (atomic_read(&global_irq_count)) + if (irqs_running()) continue; if (global_irq_lock) continue; - if (!local_bh_count[cpu] && spin_is_locked(&global_bh_lock)) + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) continue; if (!test_and_set_bit(0,&global_irq_lock)) break; @@ -285,7 +280,7 @@ static inline void wait_on_irq(int cpu) */ void synchronize_irq(void) { - if (atomic_read(&global_irq_count)) { + if (irqs_running()) { /* Stupid approach */ cli(); sti(); @@ -338,7 +333,7 @@ void __global_cli(void) if (flags & (1 << EFLAGS_IF_SHIFT)) { int cpu = smp_processor_id(); __cli(); - if (!local_irq_count[cpu]) + if (!local_irq_count(cpu)) get_irqlock(cpu); } } @@ -347,7 +342,7 @@ void __global_sti(void) { int cpu = smp_processor_id(); - if (!local_irq_count[cpu]) + if (!local_irq_count(cpu)) release_irqlock(cpu); __sti(); } @@ -364,6 +359,7 @@ unsigned long __global_save_flags(void) int retval; int local_enabled; unsigned long flags; + int cpu = smp_processor_id(); __save_flags(flags); local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; @@ -371,10 +367,10 @@ unsigned long __global_save_flags(void) retval = 2 + local_enabled; /* check for global flags if we're not in an interrupt */ - if (!local_irq_count[smp_processor_id()]) { + if (!local_irq_count(cpu)) { if (local_enabled) retval = 1; - if (global_irq_holder == (unsigned char) smp_processor_id()) + if (global_irq_holder == cpu) retval = 0; } return retval; @@ -442,16 +438,17 @@ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * * hardware disable after having gotten the irq * controller lock. */ -void disable_irq_nosync(unsigned int irq) +void inline disable_irq_nosync(unsigned int irq) { + irq_desc_t *desc = irq_desc + irq; unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); - if (!irq_desc[irq].depth++) { - irq_desc[irq].status |= IRQ_DISABLED; - irq_desc[irq].handler->disable(irq); + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); } - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } /* @@ -462,7 +459,7 @@ void disable_irq(unsigned int irq) { disable_irq_nosync(irq); - if (!local_irq_count[smp_processor_id()]) { + if (!local_irq_count(smp_processor_id())) { do { barrier(); } while (irq_desc[irq].status & IRQ_INPROGRESS); @@ -471,28 +468,29 @@ void disable_irq(unsigned int irq) void enable_irq(unsigned int irq) { + irq_desc_t *desc = irq_desc + irq; unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); - switch (irq_desc[irq].depth) { + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { case 1: { - unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED; - irq_desc[irq].status = status; + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - irq_desc[irq].status = status | IRQ_REPLAY; - hw_resend_irq(irq_desc[irq].handler,irq); + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); } - irq_desc[irq].handler->enable(irq); + desc->handler->enable(irq); /* fall-through */ } default: - irq_desc[irq].depth--; + desc->depth--; break; case 0: printk("enable_irq() unbalanced from %p\n", __builtin_return_address(0)); } - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } /* @@ -514,13 +512,12 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs) */ int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ int cpu = smp_processor_id(); - irq_desc_t *desc; + irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; kstat.irqs[cpu][irq]++; - desc = irq_desc + irq; - spin_lock(&irq_controller_lock); + spin_lock(&desc->lock); desc->handler->ack(irq); /* REPLAY is when Linux resends an IRQ that was dropped earlier @@ -540,7 +537,6 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs) status |= IRQ_INPROGRESS; /* we are handling it */ } desc->status = status; - spin_unlock(&irq_controller_lock); /* * If there is no IRQ handler or it was disabled, exit early. @@ -549,7 +545,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs) will take care of it. */ if (!action) - return 1; + goto out; /* * Edge triggered interrupts need to remember @@ -562,20 +558,24 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs) * SMP environment. */ for (;;) { + spin_unlock(&desc->lock); handle_IRQ_event(irq, ®s, action); - spin_lock(&irq_controller_lock); + spin_lock(&desc->lock); if (!(desc->status & IRQ_PENDING)) break; desc->status &= ~IRQ_PENDING; - spin_unlock(&irq_controller_lock); } desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED)) - desc->handler->end(irq); - spin_unlock(&irq_controller_lock); +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); - if (softirq_state[cpu].active&softirq_state[cpu].mask) + if (softirq_state[cpu].active & softirq_state[cpu].mask) do_softirq(); return 1; } @@ -627,14 +627,16 @@ int request_irq(unsigned int irq, void free_irq(unsigned int irq, void *dev_id) { + irq_desc_t *desc; struct irqaction **p; unsigned long flags; if (irq >= NR_IRQS) return; - spin_lock_irqsave(&irq_controller_lock,flags); - p = &irq_desc[irq].action; + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; for (;;) { struct irqaction * action = *p; if (action) { @@ -645,22 +647,22 @@ void free_irq(unsigned int irq, void *dev_id) /* Found it - now remove it from the list of entries */ *pp = action->next; - if (!irq_desc[irq].action) { - irq_desc[irq].status |= IRQ_DISABLED; - irq_desc[irq].handler->shutdown(irq); + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); } - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); #ifdef CONFIG_SMP /* Wait to make sure it's not being used on another CPU */ - while (irq_desc[irq].status & IRQ_INPROGRESS) + while (desc->status & IRQ_INPROGRESS) barrier(); #endif kfree(action); return; } printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); return; } } @@ -676,21 +678,43 @@ void free_irq(unsigned int irq, void *dev_id) unsigned long probe_irq_on(void) { unsigned int i; - unsigned long delay; + irq_desc_t *desc; unsigned long val; + unsigned long delay; + + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); /* - * first, enable any unassigned irqs + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) */ - spin_lock_irq(&irq_controller_lock); for (i = NR_IRQS-1; i > 0; i--) { - if (!irq_desc[i].action) { - irq_desc[i].status |= IRQ_AUTODETECT | IRQ_WAITING; - if(irq_desc[i].handler->startup(i)) - irq_desc[i].status |= IRQ_PENDING; + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; } + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); /* * Wait for spurious interrupts to trigger @@ -702,24 +726,24 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ val = 0; - spin_lock_irq(&irq_controller_lock); - for (i=0; i<NR_IRQS; i++) { - unsigned int status = irq_desc[i].status; - - if (!(status & IRQ_AUTODETECT)) - continue; - - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - irq_desc[i].status = status & ~IRQ_AUTODETECT; - irq_desc[i].handler->shutdown(i); - continue; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; } - - if (i < 32) - val |= 1 << i; + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); return val; } @@ -734,20 +758,22 @@ unsigned int probe_irq_mask(unsigned long val) unsigned int mask; mask = 0; - spin_lock_irq(&irq_controller_lock); for (i = 0; i < 16; i++) { - unsigned int status = irq_desc[i].status; + irq_desc_t *desc = irq_desc + i; + unsigned int status; - if (!(status & IRQ_AUTODETECT)) - continue; + spin_lock_irq(&desc->lock); + status = desc->status; - if (!(status & IRQ_WAITING)) - mask |= 1 << i; + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) + mask |= 1 << i; - irq_desc[i].status = status & ~IRQ_AUTODETECT; - irq_desc[i].handler->shutdown(i); + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); return mask & val; } @@ -762,22 +788,24 @@ int probe_irq_off(unsigned long val) nr_irqs = 0; irq_found = 0; - spin_lock_irq(&irq_controller_lock); - for (i=0; i<NR_IRQS; i++) { - unsigned int status = irq_desc[i].status; - - if (!(status & IRQ_AUTODETECT)) - continue; - - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); } - irq_desc[i].status = status & ~IRQ_AUTODETECT; - irq_desc[i].handler->shutdown(i); + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); if (nr_irqs > 1) irq_found = -irq_found; @@ -788,8 +816,9 @@ int probe_irq_off(unsigned long val) int setup_irq(unsigned int irq, struct irqaction * new) { int shared = 0; - struct irqaction *old, **p; unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; /* * Some drivers like serial.c use request_irq() heavily, @@ -811,12 +840,12 @@ int setup_irq(unsigned int irq, struct irqaction * new) /* * The following block of code has to be executed atomically */ - spin_lock_irqsave(&irq_controller_lock,flags); - p = &irq_desc[irq].action; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); return -EBUSY; } @@ -831,11 +860,171 @@ int setup_irq(unsigned int irq, struct irqaction * new) *p = new; if (!shared) { - irq_desc[irq].depth = 0; - irq_desc[irq].status &= ~IRQ_DISABLED; - irq_desc[irq].handler->startup(irq); + desc->depth = 0; + desc->status &= ~IRQ_DISABLED; + desc->handler->startup(irq); } - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); return 0; } +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0xffffffff}; + +#define HEX_DIGITS 8 + +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08x\n", irq_affinity[(int)data]); +} + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (int) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + +#if CONFIG_SMP + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; +#endif + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + struct proc_dir_entry *entry; + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type)) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0700, irq_dir[irq]); + + entry->nlink = 1; + entry->data = (void *)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + + smp_affinity_entry[irq] = entry; +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0700, root_irq_dir); + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) { + if (irq_desc[i].handler == &no_irq_type) + continue; + register_irq_proc(i); + } +} + diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 26b6525d8..84490b40b 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -20,6 +20,9 @@ * Initial release. * 1.01 18 February 2000, Tigran Aivazian <tigran@sco.com> * Added read() support + cleanups. + * 1.02 21 February 2000, Tigran Aivazian <tigran@sco.com> + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. */ #include <linux/init.h> @@ -33,7 +36,7 @@ #include <asm/uaccess.h> #include <asm/processor.h> -#define MICROCODE_VERSION "1.01" +#define MICROCODE_VERSION "1.02" MODULE_DESCRIPTION("CPU (P6) microcode update driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@ocston.org>"); @@ -53,7 +56,7 @@ static void do_update_one(void *); /* * Bits in microcode_status. (31 bits of room for future expansion) */ -#define MICROCODE_IS_OPEN 0 /* set if /dev/microcode is in use */ +#define MICROCODE_IS_OPEN 0 /* set if device is in use */ static unsigned long microcode_status = 0; /* the actual array of microcode blocks, each 2048 bytes */ @@ -68,31 +71,16 @@ static struct file_operations microcode_fops = { release: microcode_release, }; -static struct inode_operations microcode_inops = { - default_file_ops: µcode_fops, -}; - static struct proc_dir_entry *proc_microcode; static int __init microcode_init(void) { - int size; - proc_microcode = create_proc_entry("microcode", S_IWUSR|S_IRUSR, proc_root_driver); if (!proc_microcode) { printk(KERN_ERR "microcode: can't create /proc/driver/microcode\n"); return -ENOMEM; } - proc_microcode->ops = µcode_inops; - size = smp_num_cpus * sizeof(struct microcode); - mc_applied = kmalloc(size, GFP_KERNEL); - if (!mc_applied) { - remove_proc_entry("microcode", proc_root_driver); - printk(KERN_ERR "microcode: can't allocate memory for saved microcode\n"); - return -ENOMEM; - } - memset(mc_applied, 0, size); /* so that reading from offsets corresponding to failed - update makes this obvious */ + proc_microcode->proc_fops = µcode_fops; printk(KERN_INFO "P6 Microcode Update Driver v%s registered\n", MICROCODE_VERSION); return 0; } @@ -100,7 +88,8 @@ static int __init microcode_init(void) static void __exit microcode_exit(void) { remove_proc_entry("microcode", proc_root_driver); - kfree(mc_applied); + if (mc_applied) + kfree(mc_applied); printk(KERN_INFO "P6 Microcode Update Driver v%s unregistered\n", MICROCODE_VERSION); } @@ -119,6 +108,15 @@ static int microcode_open(struct inode *inode, struct file *file) if (test_and_set_bit(MICROCODE_IS_OPEN, µcode_status)) return -EBUSY; + if ((file->f_flags & O_ACCMODE) == O_WRONLY) { + proc_microcode->size = 0; + if (mc_applied) { + memset(mc_applied, 0, smp_num_cpus * sizeof(struct microcode)); + kfree(mc_applied); + mc_applied = NULL; + } + } + MOD_INC_USE_COUNT; return 0; @@ -243,6 +241,16 @@ static ssize_t microcode_write(struct file *file, const char *buf, size_t len, l sizeof(struct microcode)); return -EINVAL; } + if (!mc_applied) { + int size = smp_num_cpus * sizeof(struct microcode); + mc_applied = kmalloc(size, GFP_KERNEL); + if (!mc_applied) { + printk(KERN_ERR "microcode: can't allocate memory for saved microcode\n"); + return -ENOMEM; + } + memset(mc_applied, 0, size); + } + lock_kernel(); microcode_num = len/sizeof(struct microcode); microcode = vmalloc(len); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 81685d2f5..030b31647 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -316,11 +316,14 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) return num_processors; } +static struct intel_mp_floating *mpf_found; + /* * Scan the memory blocks for an SMP configuration block. */ -static int __init smp_get_mpf(struct intel_mp_floating *mpf) +void __init get_smp_config (void) { + struct intel_mp_floating *mpf = mpf_found; printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); if (mpf->mpf_feature2 & (1<<7)) { printk(" IMCR and PIC compatibility mode.\n"); @@ -329,7 +332,6 @@ static int __init smp_get_mpf(struct intel_mp_floating *mpf) printk(" Virtual Wire compatibility mode.\n"); pic_mode = 0; } - smp_found_config = 1; /* * default CPU id - if it's different in the mptable * then we change it before first using it. @@ -388,7 +390,7 @@ static int __init smp_get_mpf(struct intel_mp_floating *mpf) default: printk("???\nUnknown standard configuration %d\n", mpf->mpf_feature1); - return 1; + return; } if (mpf->mpf_feature1 > 4) { printk("Bus #1 is PCI\n"); @@ -412,10 +414,9 @@ static int __init smp_get_mpf(struct intel_mp_floating *mpf) /* * Only use the first configuration found. */ - return 1; } -static int __init smp_scan_config(unsigned long base, unsigned long length) +static int __init smp_scan_config (unsigned long base, unsigned long length) { unsigned long *bp = phys_to_virt(base); struct intel_mp_floating *mpf; @@ -432,9 +433,13 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) ((mpf->mpf_specification == 1) || (mpf->mpf_specification == 4)) ) { - printk("found SMP MP-table at %08ld\n", + smp_found_config = 1; + printk("found SMP MP-table at %08lx\n", virt_to_phys(mpf)); - smp_get_mpf(mpf); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + if (mpf->mpf_physptr) + reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); + mpf_found = mpf; return 1; } bp += 4; @@ -443,7 +448,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) return 0; } -void __init init_intel_smp (void) +void __init find_intel_smp (void) { unsigned int address; @@ -488,7 +493,7 @@ void __init init_intel_smp (void) * sense, but it doesnt have a BIOS(-configuration table). * No problem for Linux. */ -void __init init_visws_smp(void) +void __init find_visws_smp(void) { smp_found_config = 1; @@ -505,13 +510,13 @@ void __init init_visws_smp(void) * - Intel MP Configuration Table * - or SGI Visual Workstation configuration */ -void __init init_smp_config (void) +void __init find_smp_config (void) { #ifdef CONFIG_X86_IO_APIC - init_intel_smp(); + find_intel_smp(); #endif #ifdef CONFIG_VISWS - init_visws_smp(); + find_visws_smp(); #endif } diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c index a0a4ab851..cc9c7eafe 100644 --- a/arch/i386/kernel/mtrr.c +++ b/arch/i386/kernel/mtrr.c @@ -1507,11 +1507,6 @@ static struct file_operations mtrr_fops = # ifdef CONFIG_PROC_FS -static struct inode_operations proc_mtrr_inode_operations = -{ - &mtrr_fops, /* default property file-ops */ -}; - static struct proc_dir_entry *proc_root_mtrr; # endif /* CONFIG_PROC_FS */ @@ -1836,9 +1831,9 @@ int __init mtrr_init(void) #ifdef CONFIG_PROC_FS proc_root_mtrr = create_proc_entry ("mtrr", S_IWUSR | S_IRUGO, &proc_root); - proc_root_mtrr->ops = &proc_mtrr_inode_operations; + proc_root_mtrr->proc_fops = &mtrr_fops; #endif -#ifdev CONFIG_DEVFS_FS +#ifdef CONFIG_DEVFS_FS devfs_handle = devfs_register (NULL, "cpu/mtrr", 0, DEVFS_FL_DEFAULT, 0, 0, S_IFREG | S_IRUGO | S_IWUSR, 0, 0, &mtrr_fops, NULL); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 0f61ca543..19f7022a4 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -74,8 +74,13 @@ void enable_hlt(void) */ static void default_idle(void) { - if (current_cpu_data.hlt_works_ok && !hlt_counter) - asm volatile("sti ; hlt" : : : "memory"); + if (current_cpu_data.hlt_works_ok && !hlt_counter) { + asm volatile("cli" : : : "memory"); + if (!current->need_resched) + asm volatile("sti ; hlt" : : : "memory"); + else + asm volatile("sti" : : : "memory"); + } } /* diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index c530eece0..febc592ae 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -150,8 +150,9 @@ int __down_interruptible(struct semaphore * sem) int __down_trylock(struct semaphore * sem) { int sleepers; + unsigned long flags; - spin_lock_irq(&semaphore_lock); + spin_lock_irqsave(&semaphore_lock, flags); sleepers = sem->sleepers + 1; sem->sleepers = 0; @@ -162,7 +163,7 @@ int __down_trylock(struct semaphore * sem) if (!atomic_add_negative(sleepers, &sem->count)) wake_up(&sem->wait); - spin_unlock_irq(&semaphore_lock); + spin_unlock_irqrestore(&semaphore_lock, flags); return 1; } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index cd2a3d8af..b5602ebec 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -119,7 +119,7 @@ extern int rd_image_start; /* starting block # of image */ #endif extern int root_mountflags; -extern int _text, _etext, _edata, _end; +extern char _text, _etext, _edata, _end; extern unsigned long cpu_hz; /* @@ -709,9 +709,20 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_IO_APIC /* - * Save possible boot-time SMP configuration: + * Find and reserve possible boot-time SMP configuration: */ - init_smp_config(); + find_smp_config(); +#endif + paging_init(); +#ifdef CONFIG_X86_IO_APIC + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + get_smp_config(); +#endif +#ifdef CONFIG_X86_LOCAL_APIC + init_apic_mappings(); #endif #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 07797e760..7400b628b 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -360,8 +360,6 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) printk("Do you have a strange power saving mode enabled?\n"); } -atomic_t nmi_counter[NR_CPUS]; - #if CONFIG_X86_IO_APIC int nmi_watchdog = 1; @@ -437,7 +435,8 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) { unsigned char reason = inb(0x61); - atomic_inc(nmi_counter+smp_processor_id()); + + atomic_inc(&nmi_counter(smp_processor_id())); if (!(reason & 0xc0)) { #if CONFIG_X86_IO_APIC /* diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index c3991b056..8b1324520 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -438,10 +438,6 @@ void __init paging_init(void) __flush_tlb_all(); -#ifdef CONFIG_X86_LOCAL_APIC - init_apic_mappings(); -#endif - #ifdef CONFIG_HIGHMEM kmap_init(); #endif diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 685d85b20..af51038e5 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -82,6 +82,8 @@ void ia64_elf32_init(struct pt_regs *regs) /* Do all the IA-32 setup here */ + current->thread.map_base = 0x40000000; + /* CS descriptor */ __asm__("mov ar.csd = %0" : /* no outputs */ : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0xBL, 1L, diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index 82ba58129..bd7b0517b 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -75,7 +75,7 @@ ia32_syscall_table: data8 sys_unlink /* 10 */ data8 sys32_execve data8 sys_chdir - data8 sys_ni_syscall /* sys_time is not supported on ia64 */ + data8 sys32_time data8 sys_mknod data8 sys_chmod /* 15 */ data8 sys_lchown diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index d61f1cfe5..8d4e4a8fd 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -278,7 +278,7 @@ do_mmap_fake(struct file *file, unsigned long addr, unsigned long len, if (!file) return -EINVAL; inode = file->f_dentry->d_inode; - if (!inode->i_op || !inode->i_op->default_file_ops) + if (!inode->i_fop) return -EINVAL; if (!file->f_op->read) return -EINVAL; @@ -1930,6 +1930,25 @@ out: return err; } +/* + * sys_time() can be implemented in user-level using + * sys_gettimeofday(). IA64 did this but i386 Linux did not + * so we have to implement this system call here. + */ +asmlinkage long sys32_time(int * tloc) +{ + int i; + + /* SMP: This is fairly trivial. We grab CURRENT_TIME and + stuff it to user space. No side effects */ + i = CURRENT_TIME; + if (tloc) { + if (put_user(i,tloc)) + i = -EFAULT; + } + return i; +} + #ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */ /* In order to reduce some races, while at the same time doing additional diff --git a/arch/ia64/kdb/kdbsupport.c b/arch/ia64/kdb/kdbsupport.c index 0b574ae6e..d074a01a3 100644 --- a/arch/ia64/kdb/kdbsupport.c +++ b/arch/ia64/kdb/kdbsupport.c @@ -28,9 +28,10 @@ #include <linux/stddef.h> #include <linux/vmalloc.h> -#include <asm/uaccess.h> +#include <asm/delay.h> #include <asm/kdbsupport.h> #include <asm/rse.h> +#include <asm/uaccess.h> extern kdb_state_t kdb_state ; k_machreg_t dbregs[KDB_DBREGS]; @@ -45,6 +46,21 @@ kdb_setup (char *str) __setup("kdb", kdb_setup); static int +kdb_ia64_itm (int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + int diag; + unsigned long val; + + diag = kdbgetularg(argv[1], &val); + if (diag) + return diag; + kdb_printf("new itm=%0xlx\n", val); + + ia64_set_itm(val); + return 0; +} + +static int kdb_ia64_sir (int argc, const char **argv, const char **envp, struct pt_regs *regs) { u64 lid, tpr, lrr0, lrr1, itv, pmv, cmcv; @@ -53,15 +69,17 @@ kdb_ia64_sir (int argc, const char **argv, const char **envp, struct pt_regs *re asm ("mov %0=cr.tpr" : "=r"(tpr)); asm ("mov %0=cr.lrr0" : "=r"(lrr0)); asm ("mov %0=cr.lrr1" : "=r"(lrr1)); - printk ("lid=0x%lx, tpr=0x%lx, lrr0=0x%lx, llr1=0x%lx\n", lid, tpr, lrr0, lrr1); + printk("lid=0x%lx, tpr=0x%lx, lrr0=0x%lx, llr1=0x%lx\n", lid, tpr, lrr0, lrr1); asm ("mov %0=cr.itv" : "=r"(itv)); asm ("mov %0=cr.pmv" : "=r"(pmv)); asm ("mov %0=cr.cmcv" : "=r"(cmcv)); - printk ("itv=0x%lx, pmv=0x%lx, cmcv=0x%lx\n", itv, pmv, cmcv); + printk("itv=0x%lx, pmv=0x%lx, cmcv=0x%lx\n", itv, pmv, cmcv); - printk ("irr=0x%016lx,0x%016lx,0x%016lx,0x%016lx\n", + printk("irr=0x%016lx,0x%016lx,0x%016lx,0x%016lx\n", ia64_get_irr0(), ia64_get_irr1(), ia64_get_irr2(), ia64_get_irr3()); + + printk("itc=0x%016lx, itm=0x%016lx\n", ia64_get_itc(), ia64_get_itm()); return 0; } @@ -90,6 +108,7 @@ kdb_init (void) kdb_state.bkpt_handling_state = BKPTSTATE_NOT_HANDLED ; kdb_register("irr", kdb_ia64_sir, "", "Show interrupt registers", 0); + kdb_register("itm", kdb_ia64_itm, "", "Set new ITM value", 0); } /* diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 5efe50164..6059e41c6 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -158,6 +158,9 @@ ia64_handle_irq (unsigned long irq, struct pt_regs *regs) unsigned long eoi_ptr; # ifdef CONFIG_USB + extern void reenable_usb (void); + extern void disable_usb (void); + if (usbfix) disable_usb(); # endif diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c index 1ae904fe8..cc59e0c72 100644 --- a/arch/ia64/kernel/irq_internal.c +++ b/arch/ia64/kernel/irq_internal.c @@ -60,7 +60,7 @@ internal_noop (unsigned int irq) } struct hw_interrupt_type irq_type_ia64_internal = { - "IA64 internal", + "IA64-internal", (void (*)(unsigned long)) internal_noop, /* init */ internal_noop, /* startup */ internal_noop, /* shutdown */ diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 4c3ac242a..b4592999f 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -1026,7 +1026,7 @@ dispatch_to_fault_handler: // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) mov r16=cr.ifa rsm psr.dt -#if 0 +#if 1 // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h mov r17=_PAGE_SIZE_4K<<2 ;; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 5b6deb5f5..cc26b8760 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -97,6 +97,14 @@ cpu_idle (void *unused) check_pgt_cache(); if (pm_idle) (*pm_idle)(); +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + if (ia64_get_itm() < ia64_get_itc()) { + extern void ia64_reset_itm(); + + printk("cpu_idle: ITM in past, resetting it...\n"); + ia64_reset_itm(); + } +#endif } } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 7c5ace740..cfcff3063 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -21,6 +21,10 @@ #include <asm/ptrace.h> #include <asm/sal.h> #include <asm/system.h> +#ifdef CONFIG_KDB +# include <linux/kdb.h> +#endif + extern rwlock_t xtime_lock; extern volatile unsigned long lost_ticks; @@ -61,7 +65,7 @@ do_profile (unsigned long ip) * update to jiffy. The xtime_lock must be at least read-locked when * calling this routine. */ -static inline unsigned long +static /*inline*/ unsigned long gettimeoffset (void) { unsigned long now = ia64_get_itc(); @@ -186,6 +190,20 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_unlock(&xtime_lock); } +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + +void +ia64_reset_itm (void) +{ + unsigned long flags; + + local_irq_save(flags); + timer_interrupt(0, 0, current); + local_irq_restore(flags); +} + +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + /* * Encapsulate access to the itm structure for SMP. */ diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index c242622ec..1f5106036 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -110,15 +110,75 @@ void ia64_bad_break (unsigned long break_num, struct pt_regs *regs) { siginfo_t siginfo; + int sig, code; - /* gdb uses a break number of 0xccccc for debug breakpoints: */ - if (break_num != 0xccccc) - die_if_kernel("Bad break", regs, break_num); + /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + siginfo.si_imm = break_num; - siginfo.si_signo = SIGTRAP; - siginfo.si_errno = break_num; /* XXX is it legal to abuse si_errno like this? */ - siginfo.si_code = TRAP_BRKPT; - send_sig_info(SIGTRAP, &siginfo, current); + switch (break_num) { + case 0: /* unknown error */ + sig = SIGILL; code = ILL_ILLOPC; + break; + + case 1: /* integer divide by zero */ + sig = SIGFPE; code = FPE_INTDIV; + break; + + case 2: /* integer overflow */ + sig = SIGFPE; code = FPE_INTOVF; + break; + + case 3: /* range check/bounds check */ + sig = SIGFPE; code = FPE_FLTSUB; + break; + + case 4: /* null pointer dereference */ + sig = SIGSEGV; code = SEGV_MAPERR; + break; + + case 5: /* misaligned data */ + sig = SIGSEGV; code = BUS_ADRALN; + break; + + case 6: /* decimal overflow */ + sig = SIGFPE; code = __FPE_DECOVF; + break; + + case 7: /* decimal divide by zero */ + sig = SIGFPE; code = __FPE_DECDIV; + break; + + case 8: /* packed decimal error */ + sig = SIGFPE; code = __FPE_DECERR; + break; + + case 9: /* invalid ASCII digit */ + sig = SIGFPE; code = __FPE_INVASC; + break; + + case 10: /* invalid decimal digit */ + sig = SIGFPE; code = __FPE_INVDEC; + break; + + case 11: /* paragraph stack overflow */ + sig = SIGSEGV; code = __SEGV_PSTKOVF; + break; + + default: + if (break_num < 0x40000 || break_num > 0x100000) + die_if_kernel("Bad break", regs, break_num); + + if (break_num < 0x80000) { + sig = SIGILL; code = __ILL_BREAK; + } else { + sig = SIGTRAP; code = TRAP_BRKPT; + } + } + siginfo.si_signo = sig; + siginfo.si_errno = 0; + siginfo.si_code = code; + send_sig_info(sig, &siginfo, current); } /* @@ -240,6 +300,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) { long exception, bundle[2]; unsigned long fault_ip; + struct siginfo siginfo; static int fpu_swa_count = 0; static unsigned long last_time; @@ -265,21 +326,41 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) ia64_increment_ip(regs); } else if (exception == -1) { printk("handle_fpu_swa: fp_emulate() returned -1\n"); - return -2; + return -1; } else { /* is next instruction a trap? */ if (exception & 2) { ia64_increment_ip(regs); } - return -1; + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = 0; + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + if (isr & 0x11) { + siginfo.si_code = FPE_FLTINV; + } else if (isr & 0x44) { + siginfo.si_code = FPE_FLTDIV; + } + send_sig_info(SIGFPE, &siginfo, current); } } else { if (exception == -1) { printk("handle_fpu_swa: fp_emulate() returned -1\n"); - return -2; + return -1; } else if (exception != 0) { /* raise exception */ - return -1; + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = 0; + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + if (isr & 0x880) { + siginfo.si_code = FPE_FLTOVF; + } else if (isr & 0x1100) { + siginfo.si_code = FPE_FLTUND; + } else if (isr & 0x2200) { + siginfo.si_code = FPE_FLTRES; + } + send_sig_info(SIGFPE, &siginfo, current); } } return 0; @@ -369,22 +450,19 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, return; case 30: /* Unaligned fault */ - sprintf(buf, "Unaligned access in kernel mode---don't do this!"); + sprintf(buf, "Kernel unaligned trap accessing %016lx (ip=%016lx)!", + ifa, regs->cr_iip + ia64_psr(regs)->ri); break; case 32: /* fp fault */ case 33: /* fp trap */ - result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); + result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, &isr); if (result < 0) { siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = 0; /* XXX fix me */ + siginfo.si_code = FPE_FLTINV; siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - send_sig_info(SIGFPE, &siginfo, current); - if (result == -1) - send_sig_info(SIGFPE, &siginfo, current); - else - force_sig(SIGFPE, current); + force_sig(SIGFPE, current); } return; diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 0bd213f6b..014adcf35 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1384,30 +1384,33 @@ ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs) load_store_t *insn; int ret = -1; - /* - * We flag unaligned references while in kernel as - * errors: the kernel must be fixed. The switch code - * is in ivt.S at entry 30. - * - * So here we keep a simple sanity check. - */ - if ( !user_mode(regs) ) { - die_if_kernel("Unaligned reference while in kernel\n", regs, 30); - /* NOT_REACHED */ + if (current->thread.flags & IA64_THREAD_UAC_SIGBUS) { + struct siginfo si; + + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void *) ifa; + send_sig_info (SIGBUS, &si, current); + return; } - /* - * Make sure we log the unaligned access, so that user/sysadmin can notice it - * and eventually fix the program. - * - * We don't want to do that for every access so we pace it with jiffies. - */ - if ( unalign_count > 5 && jiffies - last_time > 5*HZ ) unalign_count = 0; - if ( ++unalign_count < 5 ) { - last_time = jiffies; - printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n", - current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri); - + if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)) { + /* + * Make sure we log the unaligned access, so that + * user/sysadmin can notice it and eventually fix the + * program. + * + * We don't want to do that for every access so we + * pace it with jiffies. + */ + if (unalign_count > 5 && jiffies - last_time > 5*HZ) + unalign_count = 0; + if (++unalign_count < 5) { + last_time = jiffies; + printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n", + current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri); + } } DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr)); diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S index 03a540a80..58c92876f 100644 --- a/arch/ia64/lib/copy_user.S +++ b/arch/ia64/lib/copy_user.S @@ -1,71 +1,375 @@ -/* - * This routine copies a linear memory buffer across the user/kernel boundary. When - * reading a byte from the source causes a fault, the remainder of the destination - * buffer is zeroed out. Note that this can happen only when copying from user - * to kernel memory and we do this to absolutely guarantee that the - * kernel doesn't operate on random data. - * - * This file is derived from arch/alpha/lib/copy_user.S. - * - * Inputs: - * in0: address of destination buffer - * in1: address of source buffer - * in2: length of buffer in bytes - * Outputs: - * r8: number of bytes that didn't get copied due to a fault - * - * Copyright (C) 1999 Hewlett-Packard Co - * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> - */ - -#define EXI(x...) \ -99: x; \ +// The label comes first because our store instruction contains a comma +// and confuse the preprocessor otherwise +// +#undef DEBUG +#ifdef DEBUG +#define EX(y,x...) \ +99: x +#else +#define EX(y,x...) \ .section __ex_table,"a"; \ - data4 @gprel(99b); \ - data4 .Lexit_in-99b; \ - .previous + data4 @gprel(99f); \ + data4 y-99f; \ + .previous; \ +99: x +#endif -#define EXO(x...) \ -99: x; \ - .section __ex_table,"a"; \ - data4 @gprel(99b); \ - data4 .Lexit_out-99b; \ - .previous - - .text - .psr abi64 - .psr lsb - .lsb - - .align 32 - .global __copy_user - .proc __copy_user +// +// Tuneable parameters +// +#define COPY_BREAK 16 // we do byte copy below (must be >=16) +#define PIPE_DEPTH 4 // pipe depth + +#define EPI p[PIPE_DEPTH-1] // PASTE(p,16+PIPE_DEPTH-1) + +// +// arguments +// +#define dst in0 +#define src in1 +#define len in2 + +// +// local registers +// +#define cnt r18 +#define len2 r19 +#define saved_lc r20 +#define saved_pr r21 +#define tmp r22 +#define val r23 +#define src1 r24 +#define dst1 r25 +#define src2 r26 +#define dst2 r27 +#define len1 r28 +#define enddst r29 +#define endsrc r30 +#define saved_pfs r31 + .text + .psr abi64 + .psr lsb + + .align 16 + .global __copy_user + .proc __copy_user __copy_user: - alloc r10=ar.pfs,3,0,0,0 - mov r9=ar.lc // save ar.lc - mov ar.lc=in2 // set ar.lc to length of buffer - br.sptk.few .Lentr - - // XXX braindead copy loop---this needs to be optimized -.Loop1: - EXI(ld1 r8=[in1],1) - ;; - EXO(st1 [in0]=r8,1) -.Lentr: br.cloop.dptk.few .Loop1 // repeat unless ar.lc--==0 - ;; // avoid RAW on ar.lc -.Lexit_out: - mov r8=ar.lc // return how many bytes we _didn't_ copy - mov ar.lc=r9 - br.ret.sptk.few rp - -.Lexit_in: - // clear the remainder of the buffer: - mov r8=ar.lc // return how many bytes we _didn't_ copy -.Loop2: - st1 [in0]=r0,1 // this cannot fault because we get here only on user->kernel copies - br.cloop.dptk.few .Loop2 - ;; // avoid RAW on ar.lc - mov ar.lc=r9 - br.ret.sptk.few rp - - .endp __copy_user + alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) + + .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + adds len2=-1,len // br.ctop is repeat/until + mov ret0=r0 + + ;; // RAW of cfm when len=0 + cmp.eq p8,p0=r0,len // check for zero length + mov saved_lc=ar.lc // preserve ar.lc (slow) +(p8) br.ret.spnt.few rp // empty mempcy() + ;; + add enddst=dst,len // first byte after end of source + add endsrc=src,len // first byte after end of destination + mov saved_pr=pr // preserve predicates + + mov dst1=dst // copy because of rotation + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + + mov src1=src // copy because of rotation + mov ar.lc=len2 // initialize lc for small count + cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy + + xor tmp=src,dst // same alignment test prepare +(p10) br.cond.dptk.few long_memcpy + ;; // RAW pr.rot/p16 ? + // + // Now we do the byte by byte loop with software pipeline + // + // p7 is necessarily false by now +1: + EX(failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + + EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 1b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // restore ar.ec + br.ret.sptk.few rp // end of short memcpy + + // + // Beginning of long mempcy (i.e. > 16 bytes) + // +long_memcpy: + tbit.nz p6,p7=src1,0 // odd alignement + and tmp=7,tmp + ;; + cmp.eq p10,p8=r0,tmp + mov len1=len // copy because of rotation +(p8) br.cond.dpnt.few 1b // XXX Fixme. memcpy_diff_align + ;; + // At this point we know we have more than 16 bytes to copy + // and also that both src and dest have the same alignment + // which may not be the one we want. So for now we must move + // forward slowly until we reach 16byte alignment: no need to + // worry about reaching the end of buffer. + // + EX(failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned +(p6) adds len1=-1,len1;; + tbit.nz p7,p0=src1,1 + ;; + EX(failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned +(p7) adds len1=-2,len1;; + tbit.nz p8,p0=src1,2 + ;; + // + // Stop bit not required after ld4 because if we fail on ld4 + // we have never executed the ld1, therefore st1 is not executed. + // + EX(failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned + EX(failure_out,(p6) st1 [dst1]=val1[0],1) + tbit.nz p9,p0=src1,3 + ;; + // + // Stop bit not required after ld8 because if we fail on ld8 + // we have never executed the ld2, therefore st2 is not executed. + // + EX(failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned + EX(failure_out,(p7) st2 [dst1]=val1[1],2) +(p8) adds len1=-4,len1 + ;; + EX(failure_out, (p8) st4 [dst1]=val2[0],4) +(p9) adds len1=-8,len1;; + shr.u cnt=len1,4 // number of 128-bit (2x64bit) words + ;; + EX(failure_out, (p9) st8 [dst1]=val2[1],8) + tbit.nz p6,p0=len1,3 + cmp.eq p7,p0=r0,cnt + adds tmp=-1,cnt // br.ctop is repeat/until +(p7) br.cond.dpnt.few .dotail // we have less than 16 bytes left + ;; + adds src2=8,src1 + adds dst2=8,dst1 + mov ar.lc=tmp + ;; + // + // 16bytes/iteration + // +2: + EX(failure_in3,(p16) ld8 val1[0]=[src1],16) +(p16) ld8 val2[0]=[src2],16 + + EX(failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16) +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk.few 2b + ;; // RAW on src1 when fall through from loop + // + // Tail correction based on len only + // + // No matter where we come from (loop or test) the src1 pointer + // is 16 byte aligned AND we have less than 16 bytes to copy. + // +.dotail: + EX(failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes + tbit.nz p7,p0=len1,2 + ;; + EX(failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes + tbit.nz p8,p0=len1,1 + ;; + EX(failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes + tbit.nz p9,p0=len1,0 + ;; + EX(failure_out, (p6) st8 [dst1]=val1[0],8) + ;; + EX(failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left + mov ar.lc=saved_lc + ;; + EX(failure_out,(p7) st4 [dst1]=val1[1],4) + mov pr=saved_pr,0xffffffffffff0000 + ;; + EX(failure_out, (p8) st2 [dst1]=val2[0],2) + mov ar.pfs=saved_pfs + ;; + EX(failure_out, (p9) st1 [dst1]=val2[1]) + br.ret.dptk.few rp + + + + // + // Here we handle the case where the byte by byte copy fails + // on the load. + // Several factors make the zeroing of the rest of the buffer kind of + // tricky: + // - the pipeline: loads/stores are not in sync (pipeline) + // + // In the same loop iteration, the dst1 pointer does not directly + // reflect where the faulty load was. + // + // - pipeline effect + // When you get a fault on load, you may have valid data from + // previous loads not yet store in transit. Such data must be + // store normally before moving onto zeroing the rest. + // + // - single/multi dispersal independence. + // + // solution: + // - we don't disrupt the pipeline, i.e. data in transit in + // the software pipeline will be eventually move to memory. + // We simply replace the load with a simple mov and keep the + // pipeline going. We can't really do this inline because + // p16 is always reset to 1 when lc > 0. + // +failure_in_pipe1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +1: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk.few 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.dptk.few rp + + + // + // Here we handle the head & tail part when we check for alignment. + // The following code handles only the load failures. The + // main diffculty comes from the fact that loads/stores are + // scheduled. So when you fail on a load, the stores corresponding + // to previous successful loads must be executed. + // + // However some simplifications are possible given the way + // things work. + // + // 1) HEAD + // Theory of operation: + // + // Page A | Page B + // ---------|----- + // 1|8 x + // 1 2|8 x + // 4|8 x + // 1 4|8 x + // 2 4|8 x + // 1 2 4|8 x + // |1 + // |2 x + // |4 x + // + // page_size >= 4k (2^12). (x means 4, 2, 1) + // Here we suppose Page A exists and Page B does not. + // + // As we move towards eight byte alignment we may encounter faults. + // The numbers on each page show the size of the load (current alignment). + // + // Key point: + // - if you fail on 1, 2, 4 then you have never executed any smaller + // size loads, e.g. failing ld4 means no ld1 nor ld2 executed + // before. + // + // This allows us to simplify the cleanup code, because basically you + // only have to worry about "pending" stores in the case of a failing + // ld8(). Given the way the code is written today, this means only + // worry about st2, st4. There we can use the information encapsulated + // into the predicates. + // + // Other key point: + // - if you fail on the ld8 in the head, it means you went straight + // to it, i.e. 8byte alignment within an unexisting page. + // Again this comes from the fact that if you crossed just for the the ld8 then + // you are 8byte aligned but also 16byte align, therefore you would + // either go for the 16byte copy loop OR the ld8 in the tail part. + // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible + // because it would mean you had 15bytes to copy in which case you + // would have defaulted to the byte by byte copy. + // + // + // 2) TAIL + // Here we now we have less than 16 bytes AND we are either 8 or 16 byte + // aligned. + // + // Key point: + // This means that we either: + // - are right on a page boundary + // OR + // - are at more than 16 bytes from a page boundary with + // at most 15 bytes to copy: no chance of crossing. + // + // This allows us to assume that if we fail on a load we haven't possibly + // executed any of the previous (tail) ones, so we don't need to do + // any stores. For instance, if we fail on ld2, this means we had + // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4. + // + // This means that we are in a situation similar the a fault in the + // head part. That's nice! + // +failure_in1: +// sub ret0=enddst,dst1 // number of bytes to zero, i.e. not copied +// sub len=enddst,dst1,1 + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + sub len=endsrc,src1,1 + // + // we know that ret0 can never be zero at this point + // because we failed why trying to do a load, i.e. there is still + // some work to do. + // The failure_in1bis and length problem is taken care of at the + // calling side. + // + ;; +failure_in1bis: // from (failure_in3) + mov ar.lc=len // Continue with a stupid byte store. + ;; +5: + st1 [dst1]=r0,1 + br.cloop.dptk.few 5b + ;; +skip_loop: + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.dptk.few rp + + // + // Here we simply restart the loop but instead + // of doing loads we fill the pipeline with zeroes + // We can't simply store r0 because we may have valid + // data in transit in the pipeline. + // ar.lc and ar.ec are setup correctly at this point + // + // we MUST use src1/endsrc here and not dst1/enddst because + // of the pipeline effect. + // +failure_in3: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + ;; +2: +(p16) mov val1[0]=r0 +(p16) mov val2[0]=r0 +(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16 +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk.few 2b + ;; + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk.few failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.dptk.few rp + + // + // handling of failures on stores: that's the easy part + // +failure_out: + sub ret0=enddst,dst1 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + + mov ar.pfs=saved_pfs + br.ret.dptk.few rp + + + .endp __copy_user + diff --git a/arch/mips/defconfig b/arch/mips/defconfig index 5516700c1..bec96e794 100644 --- a/arch/mips/defconfig +++ b/arch/mips/defconfig @@ -176,6 +176,8 @@ CONFIG_SCSI_CONSTANTS=y # SCSI low-level drivers # CONFIG_SCSI_SGIWD93=y +CONFIG_SCSI_SGIWD93=y +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_7000FASST is not set # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AHA152X is not set @@ -210,7 +212,6 @@ CONFIG_SCSI_SGIWD93=y # CONFIG_SCSI_U14_34F is not set # CONFIG_SCSI_ULTRASTOR is not set # CONFIG_SCSI_DEBUG is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set # # I2O device support @@ -269,6 +270,7 @@ CONFIG_PSMOUSE=y # CONFIG_WATCHDOG is not set # CONFIG_NVRAM is not set # CONFIG_RTC is not set +# CONFIG_EFI_RTC is not set # # Video For Linux diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index b0225f750..41ea11b98 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -1,4 +1,4 @@ -/* $Id: irixelf.c,v 1.23 2000/01/29 01:41:59 ralf Exp $ +/* $Id: irixelf.c,v 1.24 2000/02/04 07:40:23 ralf Exp $ * * irixelf.c: Code to load IRIX ELF executables which conform to * the MIPS ABI. @@ -257,11 +257,11 @@ static unsigned int load_irix_interp(struct elfhdr * interp_elf_ex, #endif /* First of all, some simple consistency checks */ - if((interp_elf_ex->e_type != ET_EXEC && - interp_elf_ex->e_type != ET_DYN) || - !elf_check_arch(interp_elf_ex->e_machine) || - (!interpreter_dentry->d_inode->i_op || - !interpreter_dentry->d_inode->i_op->default_file_ops->mmap)) { + if ((interp_elf_ex->e_type != ET_EXEC && + interp_elf_ex->e_type != ET_DYN) || + !elf_check_arch(interp_elf_ex->e_machine) || + (!interpreter_dentry->d_inode->i_fop || + !interpreter_dentry->d_inode->i_fop->mmap)) { printk("IRIX interp has bad e_type %d\n", interp_elf_ex->e_type); return 0xffffffff; } @@ -410,9 +410,8 @@ static int verify_binary(struct elfhdr *ehp, struct linux_binprm *bprm) /* First of all, some simple consistency checks */ if((ehp->e_type != ET_EXEC && ehp->e_type != ET_DYN) || !elf_check_arch(ehp->e_machine) || - (!bprm->dentry->d_inode->i_op || - !bprm->dentry->d_inode->i_op->default_file_ops || - !bprm->dentry->d_inode->i_op->default_file_ops->mmap)) { + (!bprm->dentry->d_inode->i_fop || + !bprm->dentry->d_inode->i_fop->mmap)) { return -ENOEXEC; } @@ -877,8 +876,8 @@ static inline int do_load_irix_library(struct file *file) /* First of all, some simple consistency checks. */ if(elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || !elf_check_arch(elf_ex.e_machine) || - (!dentry->d_inode->i_op || - !dentry->d_inode->i_op->default_file_ops->mmap)) + (!dentry->d_inode->i_fop || + !dentry->d_inode->i_fop->mmap)) return -ENOEXEC; /* Now read in all of the header information. */ diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 4e3ba5ad9..239576e4e 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -1,4 +1,4 @@ -/* $Id: irq.c,v 1.19 2000/02/04 07:40:23 ralf Exp $ +/* $Id: irq.c,v 1.20 2000/02/23 00:41:00 ralf Exp $ * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -32,6 +32,24 @@ #include <asm/nile4.h> /* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +irq_cpustat_t irq_stat [NR_CPUS]; + +/* * This contains the irq mask for both 8259A irq controllers, it's an * int so we can deal with the third PIC in some systems like the RM300. * (XXX This is broken for big endian.) diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c index f1c65805f..b8e1e6ec2 100644 --- a/arch/mips/kernel/mips_ksyms.c +++ b/arch/mips/kernel/mips_ksyms.c @@ -1,4 +1,4 @@ -/* $Id: mips_ksyms.c,v 1.24 2000/02/04 07:40:23 ralf Exp $ +/* $Id: mips_ksyms.c,v 1.25 2000/02/24 00:12:40 ralf Exp $ * * Export MIPS-specific functions needed for loadable modules. * @@ -56,8 +56,6 @@ EXPORT_SYMBOL_NOVERS(strtok); EXPORT_SYMBOL_NOVERS(strpbrk); EXPORT_SYMBOL(_clear_page); -EXPORT_SYMBOL(local_bh_count); -EXPORT_SYMBOL(local_irq_count); EXPORT_SYMBOL(enable_irq); EXPORT_SYMBOL(disable_irq); EXPORT_SYMBOL(kernel_thread); diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index c2ee57b86..9bffcdc96 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -83,3 +83,8 @@ int get_cpuinfo(char *buffer) return len; } + +void init_irq_proc(void) +{ + /* Nothing, for now. */ +} diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 43cf5ad74..f6209461f 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.21 2000/01/26 00:07:44 ralf Exp $ +/* $Id: setup.c,v 1.22 2000/01/27 01:05:23 ralf Exp $ * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -220,4 +220,6 @@ void __init setup_arch(char **cmdline_p) *memory_start_p = initrd_end; } #endif + + paging_init(); } diff --git a/arch/mips/sgi/kernel/indy_int.c b/arch/mips/sgi/kernel/indy_int.c index 916b0873a..cab112c29 100644 --- a/arch/mips/sgi/kernel/indy_int.c +++ b/arch/mips/sgi/kernel/indy_int.c @@ -1,4 +1,4 @@ -/* $Id: indy_int.c,v 1.16 1999/12/04 03:59:00 ralf Exp $ +/* $Id: indy_int.c,v 1.17 2000/02/04 07:40:23 ralf Exp $ * * indy_int.c: Routines for generic manipulation of the INT[23] ASIC * found on INDY workstations.. @@ -40,6 +40,24 @@ #include <asm/sgialib.h> #include <asm/gdb-stub.h> +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +irq_cpustat_t irq_stat [NR_CPUS]; + /* #define DEBUG_SGINT */ struct sgi_int2_regs *sgi_i2regs; diff --git a/arch/mips64/defconfig b/arch/mips64/defconfig index 039ffd84f..c4f4ba27a 100644 --- a/arch/mips64/defconfig +++ b/arch/mips64/defconfig @@ -305,6 +305,7 @@ CONFIG_SERIAL_CONSOLE=y # CONFIG_WATCHDOG is not set # CONFIG_NVRAM is not set # CONFIG_RTC is not set +# CONFIG_EFI_RTC is not set # # Video For Linux diff --git a/arch/mips64/defconfig-ip22 b/arch/mips64/defconfig-ip22 index 9c6687735..b40469cfc 100644 --- a/arch/mips64/defconfig-ip22 +++ b/arch/mips64/defconfig-ip22 @@ -224,6 +224,7 @@ CONFIG_VT_CONSOLE=y # CONFIG_WATCHDOG is not set # CONFIG_NVRAM is not set # CONFIG_RTC is not set +# CONFIG_EFI_RTC is not set # # Video For Linux diff --git a/arch/mips64/defconfig-ip27 b/arch/mips64/defconfig-ip27 index 039ffd84f..c4f4ba27a 100644 --- a/arch/mips64/defconfig-ip27 +++ b/arch/mips64/defconfig-ip27 @@ -305,6 +305,7 @@ CONFIG_SERIAL_CONSOLE=y # CONFIG_WATCHDOG is not set # CONFIG_NVRAM is not set # CONFIG_RTC is not set +# CONFIG_EFI_RTC is not set # # Video For Linux diff --git a/arch/mips64/kernel/proc.c b/arch/mips64/kernel/proc.c index 6fba1b756..063ac5d88 100644 --- a/arch/mips64/kernel/proc.c +++ b/arch/mips64/kernel/proc.c @@ -1,4 +1,4 @@ -/* $Id: proc.c,v 1.1 1999/09/27 16:01:37 ralf Exp $ +/* $Id: proc.c,v 1.1 1999/09/28 22:25:51 ralf Exp $ * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -69,3 +69,8 @@ int get_cpuinfo(char *buffer) return len; } + +void init_irq_proc(void) +{ + /* Nothing, for now. */ +} diff --git a/arch/mips64/kernel/setup.c b/arch/mips64/kernel/setup.c index b42271b47..2a7d8a894 100644 --- a/arch/mips64/kernel/setup.c +++ b/arch/mips64/kernel/setup.c @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.6 2000/01/27 01:05:24 ralf Exp $ +/* $Id: setup.c,v 1.7 2000/02/04 07:40:24 ralf Exp $ * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -183,4 +183,6 @@ void __init setup_arch(char **cmdline_p) *memory_start_p = initrd_end; } #endif + + paging_init(); } diff --git a/arch/mips64/sgi-ip22/ip22-int.c b/arch/mips64/sgi-ip22/ip22-int.c index 420e47fc7..a26ad631d 100644 --- a/arch/mips64/sgi-ip22/ip22-int.c +++ b/arch/mips64/sgi-ip22/ip22-int.c @@ -1,4 +1,4 @@ -/* $Id: ip22-int.c,v 1.3 1999/12/04 03:59:01 ralf Exp $ +/* $Id: ip22-int.c,v 1.4 2000/02/04 07:40:24 ralf Exp $ * * indy_int.c: Routines for generic manipulation of the INT[23] ASIC * found on INDY workstations.. @@ -37,6 +37,24 @@ #include <asm/sgi/sgint23.h> #include <asm/sgialib.h> +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +irq_cpustat_t irq_stat [NR_CPUS]; + struct sgi_int2_regs *sgi_i2regs; struct sgi_int3_regs *sgi_i3regs; struct sgi_ioc_ints *ioc_icontrol; diff --git a/arch/mips64/sgi-ip27/ip27-irq.c b/arch/mips64/sgi-ip27/ip27-irq.c index 7f5a36f97..c9e6fe150 100644 --- a/arch/mips64/sgi-ip27/ip27-irq.c +++ b/arch/mips64/sgi-ip27/ip27-irq.c @@ -1,4 +1,4 @@ -/* $Id: ip27-irq.c,v 1.5 2000/02/04 07:40:24 ralf Exp $ +/* $Id: ip27-irq.c,v 1.6 2000/02/10 05:58:56 dagum Exp $ * * ip27-irq.c: Highlevel interrupt handling for IP27 architecture. * @@ -35,6 +35,24 @@ #include <asm/sn/sn0/ip27.h> #include <asm/sn/arch.h> +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +irq_cpustat_t irq_stat [NR_CPUS]; + extern asmlinkage void ip27_irq(void); int (*irq_cannonicalize)(int irq); @@ -255,7 +273,7 @@ void irq_debug(void) printk("PI_INT_MASK0_A = 0x%x\n", LOCAL_HUB_L(PI_INT_MASK0_A)); } -int setup_irq(int irq, struct irqaction *new) +int setup_irq(unsigned int irq, struct irqaction *new) { int shared = 0; struct irqaction *old, **p; diff --git a/arch/ppc/chrpboot/main.c b/arch/ppc/chrpboot/main.c index d54a429a9..91bf4d8c4 100644 --- a/arch/ppc/chrpboot/main.c +++ b/arch/ppc/chrpboot/main.c @@ -10,7 +10,6 @@ #include "../coffboot/zlib.h" #include <asm/bootinfo.h> #include <asm/processor.h> -#define __KERNEL__ #include <asm/page.h> extern void *finddevice(const char *); @@ -49,17 +48,8 @@ chrpboot(int a1, int a2, void *prom) printf("chrpboot starting: loaded at 0x%x\n\r", &_start); - if (initrd_len) { - initrd_size = initrd_len; - initrd_start = (RAM_END - initrd_size) & ~0xFFF; - a1 = initrd_start; - a2 = initrd_size; - printf("initial ramdisk moving 0x%x <- 0x%x (%x bytes)\n\r", initrd_start, - initrd_data,initrd_size); - memcpy((char *)initrd_start, initrd_data, initrd_size); - end_avail = (char *)initrd_start; - } else - end_avail = (char *) RAM_END; + end_avail = (char *) RAM_END; + im = image_data; len = image_len; dst = (void *) PROG_START; @@ -98,7 +88,7 @@ chrpboot(int a1, int a2, void *prom) rec = (struct bi_record *)((unsigned long)rec + rec->size); rec->tag = BI_SYSMAP; - rec->data[0] = sysmap_data; + rec->data[0] = (unsigned long)sysmap_data; rec->data[1] = sysmap_len; rec->size = sizeof(struct bi_record) + sizeof(unsigned long); rec = (struct bi_record *)((unsigned long)rec + rec->size); @@ -129,6 +119,10 @@ void *zalloc(void *x, unsigned items, unsigned size) void zfree(void *x, void *addr, unsigned nb) { + nb = (nb + 7) & -8; + if (addr == (avail_ram - nb)) { + avail_ram -= nb; + } } #define HEAD_CRC 2 diff --git a/arch/ppc/chrpboot/piggyback.c b/arch/ppc/chrpboot/piggyback.c index 172025802..304bc8f11 100644 --- a/arch/ppc/chrpboot/piggyback.c +++ b/arch/ppc/chrpboot/piggyback.c @@ -1,8 +1,9 @@ #include <stdio.h> +#include <unistd.h> extern long ce_exec_config[]; -main(int argc, char *argv[]) +int main(int argc, char *argv[]) { int i, cnt, pos, len; unsigned int cksum, val; diff --git a/arch/ppc/coffboot/piggyback.c b/arch/ppc/coffboot/piggyback.c index 172025802..304bc8f11 100644 --- a/arch/ppc/coffboot/piggyback.c +++ b/arch/ppc/coffboot/piggyback.c @@ -1,8 +1,9 @@ #include <stdio.h> +#include <unistd.h> extern long ce_exec_config[]; -main(int argc, char *argv[]) +int main(int argc, char *argv[]) { int i, cnt, pos, len; unsigned int cksum, val; diff --git a/arch/ppc/config.in b/arch/ppc/config.in index 8bb23afa2..7d0ab5fa0 100644 --- a/arch/ppc/config.in +++ b/arch/ppc/config.in @@ -47,12 +47,9 @@ if [ "$CONFIG_8xx" = "y" ]; then fi if [ "$CONFIG_6xx" = "y" ]; then choice 'Machine Type' \ - "PowerMac CONFIG_PMAC \ - PReP/MTX CONFIG_PREP \ - CHRP CONFIG_CHRP \ - PowerMac/PReP/CHRP CONFIG_ALL_PPC \ + "PowerMac/PReP/MTX/CHRP CONFIG_ALL_PPC \ Gemini CONFIG_GEMINI \ - APUS CONFIG_APUS" PowerMac + APUS CONFIG_APUS" PowerMac/PReP/MTX/CHRP fi if [ "$CONFIG_PPC64" = "y" ]; then diff --git a/arch/ppc/configs/common_defconfig b/arch/ppc/configs/common_defconfig index 17217702f..4ba96bde9 100644 --- a/arch/ppc/configs/common_defconfig +++ b/arch/ppc/configs/common_defconfig @@ -17,9 +17,6 @@ CONFIG_6xx=y # CONFIG_PPC64 is not set # CONFIG_82xx is not set # CONFIG_8xx is not set -# CONFIG_PMAC is not set -# CONFIG_PREP is not set -# CONFIG_CHRP is not set CONFIG_ALL_PPC=y # CONFIG_GEMINI is not set # CONFIG_APUS is not set @@ -286,30 +283,29 @@ CONFIG_GMAC=y # CONFIG_LANCE is not set # CONFIG_NET_VENDOR_SMC is not set # CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_RTL8139 is not set -# CONFIG_DM9102 is not set # CONFIG_AT1700 is not set # CONFIG_DEPCA is not set # CONFIG_NET_ISA is not set -CONFIG_NET_EISA=y +CONFIG_NET_PCI=y CONFIG_PCNET32=y # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_AC3200 is not set # CONFIG_APRICOT is not set -# CONFIG_CS89x0 is not set CONFIG_DE4X5=y -# CONFIG_DEC_ELCP is not set +# CONFIG_TULIP is not set # CONFIG_DGRS is not set -# CONFIG_EEXPRESS_PRO100 is not set +# CONFIG_DM9102 is not set +# CONFIG_EEPRO100 is not set # CONFIG_LNE390 is not set # CONFIG_NE3210 is not set # CONFIG_NE2K_PCI is not set +# CONFIG_RTL8129 is not set +# CONFIG_8139TOO is not set # CONFIG_SIS900 is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_ES3210 is not set # CONFIG_EPIC100 is not set -# CONFIG_ZNET is not set # CONFIG_NET_POCKET is not set # @@ -499,6 +495,7 @@ CONFIG_USB_OHCI=y # CONFIG_USB_DC2XX is not set # CONFIG_USB_STORAGE is not set # CONFIG_USB_DABUSB is not set +# CONFIG_USB_PLUSB is not set # # USB HID @@ -508,13 +505,15 @@ CONFIG_USB_KBD=y CONFIG_USB_MOUSE=y # CONFIG_USB_GRAPHIRE is not set # CONFIG_USB_WMFORCE is not set -# CONFIG_INPUT_KEYBDEV is not set -# CONFIG_INPUT_MOUSEDEV is not set +CONFIG_INPUT_KEYBDEV=y +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_MIX is not set +# CONFIG_INPUT_MOUSEDEV_DIGITIZER is not set # CONFIG_INPUT_JOYDEV is not set # CONFIG_INPUT_EVDEV is not set # -# Filesystems +# File systems # # CONFIG_QUOTA is not set CONFIG_AUTOFS_FS=y @@ -535,6 +534,7 @@ CONFIG_ISO9660_FS=y # CONFIG_NTFS_FS is not set # CONFIG_HPFS_FS is not set CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set CONFIG_DEVPTS_FS=y # CONFIG_QNX4FS_FS is not set # CONFIG_ROMFS_FS is not set diff --git a/arch/ppc/defconfig b/arch/ppc/defconfig index 17217702f..4ba96bde9 100644 --- a/arch/ppc/defconfig +++ b/arch/ppc/defconfig @@ -17,9 +17,6 @@ CONFIG_6xx=y # CONFIG_PPC64 is not set # CONFIG_82xx is not set # CONFIG_8xx is not set -# CONFIG_PMAC is not set -# CONFIG_PREP is not set -# CONFIG_CHRP is not set CONFIG_ALL_PPC=y # CONFIG_GEMINI is not set # CONFIG_APUS is not set @@ -286,30 +283,29 @@ CONFIG_GMAC=y # CONFIG_LANCE is not set # CONFIG_NET_VENDOR_SMC is not set # CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_RTL8139 is not set -# CONFIG_DM9102 is not set # CONFIG_AT1700 is not set # CONFIG_DEPCA is not set # CONFIG_NET_ISA is not set -CONFIG_NET_EISA=y +CONFIG_NET_PCI=y CONFIG_PCNET32=y # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_AC3200 is not set # CONFIG_APRICOT is not set -# CONFIG_CS89x0 is not set CONFIG_DE4X5=y -# CONFIG_DEC_ELCP is not set +# CONFIG_TULIP is not set # CONFIG_DGRS is not set -# CONFIG_EEXPRESS_PRO100 is not set +# CONFIG_DM9102 is not set +# CONFIG_EEPRO100 is not set # CONFIG_LNE390 is not set # CONFIG_NE3210 is not set # CONFIG_NE2K_PCI is not set +# CONFIG_RTL8129 is not set +# CONFIG_8139TOO is not set # CONFIG_SIS900 is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_ES3210 is not set # CONFIG_EPIC100 is not set -# CONFIG_ZNET is not set # CONFIG_NET_POCKET is not set # @@ -499,6 +495,7 @@ CONFIG_USB_OHCI=y # CONFIG_USB_DC2XX is not set # CONFIG_USB_STORAGE is not set # CONFIG_USB_DABUSB is not set +# CONFIG_USB_PLUSB is not set # # USB HID @@ -508,13 +505,15 @@ CONFIG_USB_KBD=y CONFIG_USB_MOUSE=y # CONFIG_USB_GRAPHIRE is not set # CONFIG_USB_WMFORCE is not set -# CONFIG_INPUT_KEYBDEV is not set -# CONFIG_INPUT_MOUSEDEV is not set +CONFIG_INPUT_KEYBDEV=y +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_MIX is not set +# CONFIG_INPUT_MOUSEDEV_DIGITIZER is not set # CONFIG_INPUT_JOYDEV is not set # CONFIG_INPUT_EVDEV is not set # -# Filesystems +# File systems # # CONFIG_QUOTA is not set CONFIG_AUTOFS_FS=y @@ -535,6 +534,7 @@ CONFIG_ISO9660_FS=y # CONFIG_NTFS_FS is not set # CONFIG_HPFS_FS is not set CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set CONFIG_DEVPTS_FS=y # CONFIG_QNX4FS_FS is not set # CONFIG_ROMFS_FS is not set diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile index ea7c7c6e7..0f7167622 100644 --- a/arch/ppc/kernel/Makefile +++ b/arch/ppc/kernel/Makefile @@ -88,24 +88,14 @@ endif ifeq ($(CONFIG_NVRAM),y) O_OBJS += pmac_nvram.o endif -ifeq ($(CONFIG_6xx),y) - O_OBJS += open_pic.o indirect_pci.o -endif -ifeq ($(CONFIG_PPC64),y) - O_OBJS += open_pic.o indirect_pci.o -endif ifeq ($(CONFIG_APUS),y) O_OBJS += apus_setup.o endif -ifeq ($(CONFIG_PMAC),y) - O_OBJS += pmac_pic.o pmac_setup.o pmac_time.o feature.o pmac_pci.o prom.o -endif -ifeq ($(CONFIG_CHRP),y) - O_OBJS += chrp_pci.o pmac_pci.o chrp_setup.o i8259.o \ - chrp_time.o pmac_time.o prom.o -endif -ifeq ($(CONFIG_PREP),y) - O_OBJS += prep_pci.o i8259.o prep_setup.o prep_nvram.o prep_time.o residual.o +ifeq ($(CONFIG_ALL_PPC),y) + O_OBJS += pmac_pic.o pmac_setup.o pmac_time.o feature.o pmac_pci.o prom.o \ + chrp_setup.o chrp_time.o chrp_pci.o open_pic.o indirect_pci.o \ + prep_pci.o i8259.o prep_nvram.o prep_time.o residual.o + OX_OBJS += prep_setup.o endif ifeq ($(CONFIG_GEMINI),y) O_OBJS += gemini_prom.o gemini_pci.o gemini_setup.o diff --git a/arch/ppc/kernel/apus_setup.c b/arch/ppc/kernel/apus_setup.c index 5f0c4b06e..a54efc6fd 100644 --- a/arch/ppc/kernel/apus_setup.c +++ b/arch/ppc/kernel/apus_setup.c @@ -10,7 +10,7 @@ * TODO: * This file needs a *really* good cleanup. Restructure and optimize. * Make sure it can be compiled for non-APUS configs. Begin to move - * Amiga specific stuff into linux/machine/amiga. + * Amiga specific stuff into mach/amiga. */ #include <linux/config.h> @@ -27,6 +27,10 @@ #include <asm/logging.h> #endif +/* Needs INITSERIAL call in head.S! */ +#undef APUS_DEBUG + + #include <linux/ide.h> #define T_CHAR (0x0000) /* char: don't touch */ #define T_SHORT (0x4000) /* short: 12 -> 21 */ @@ -60,37 +64,6 @@ static u_short driveid_types[] = { #define num_driveid_types (sizeof(driveid_types)/sizeof(*driveid_types)) -#if 0 /* Get rid of this crud */ -/* Get the IDE stuff from the 68k file */ -#define ide_init_hwif_ports m68k_ide_init_hwif_ports -#define ide_default_irq m68k_ide_default_irq -#undef ide_request_irq -#define ide_request_irq m68k_ide_request_irq -#undef ide_free_irq -#define ide_free_irq m68k_ide_free_irq -#define ide_default_io_base m68k_ide_default_io_base -#define ide_check_region m68k_ide_check_region -#define ide_request_region m68k_ide_request_region -#define ide_release_region m68k_ide_release_region -#define ide_fix_driveid m68k_ide_fix_driveid -#define ide_init_default_hwifs m68k_ide_init_default_hwifs -#define select_t m68k_select_t -//#include <asm/hdreg.h> -#include <asm-m68k/ide.h> -#undef ide_free_irq -#undef select_t -#undef ide_request_irq -#undef ide_init_default_hwifs -#undef ide_init_hwif_ports -#undef ide_default_irq -#undef ide_default_io_base -#undef ide_check_region -#undef ide_request_region -#undef ide_release_region -#undef ide_fix_driveid -/*-------------------------------------------*/ -#endif - #include <asm/bootinfo.h> #include <asm/setup.h> #include <asm/amigahw.h> @@ -764,6 +737,12 @@ void apus_ide_init_hwif_ports (hw_regs_t *hw, ide_ioreg_t data_port, /****************************************************** IRQ stuff */ __apus +static unsigned int apus_irq_cannonicalize(unsigned int irq) +{ + return irq; +} + +__apus int apus_get_irq_list(char *buf) { #ifdef CONFIG_APUS @@ -922,6 +901,114 @@ static void apus_kbd_init_hw(void) } +/****************************************************** debugging */ + +/* some serial hardware definitions */ +#define SDR_OVRUN (1<<15) +#define SDR_RBF (1<<14) +#define SDR_TBE (1<<13) +#define SDR_TSRE (1<<12) + +#define AC_SETCLR (1<<15) +#define AC_UARTBRK (1<<11) + +#define SER_DTR (1<<7) +#define SER_RTS (1<<6) +#define SER_DCD (1<<5) +#define SER_CTS (1<<4) +#define SER_DSR (1<<3) + +static __inline__ void ser_RTSon(void) +{ + ciab.pra &= ~SER_RTS; /* active low */ +} + +__apus +int __debug_ser_out( unsigned char c ) +{ + custom.serdat = c | 0x100; + mb(); + while (!(custom.serdatr & 0x2000)) + barrier(); + return 1; +} + +__apus +unsigned char __debug_ser_in( void ) +{ + unsigned char c; + + /* XXX: is that ok?? derived from amiga_ser.c... */ + while( !(custom.intreqr & IF_RBF) ) + barrier(); + c = custom.serdatr; + /* clear the interrupt, so that another character can be read */ + custom.intreq = IF_RBF; + return c; +} + +__apus +int __debug_serinit( void ) +{ + unsigned long flags; + + save_flags (flags); + cli(); + + /* turn off Rx and Tx interrupts */ + custom.intena = IF_RBF | IF_TBE; + + /* clear any pending interrupt */ + custom.intreq = IF_RBF | IF_TBE; + + restore_flags (flags); + + /* + * set the appropriate directions for the modem control flags, + * and clear RTS and DTR + */ + ciab.ddra |= (SER_DTR | SER_RTS); /* outputs */ + ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR); /* inputs */ + +#ifdef CONFIG_KGDB + /* turn Rx interrupts on for GDB */ + custom.intena = IF_SETCLR | IF_RBF; + ser_RTSon(); +#endif + + return 0; +} + +__apus +void __debug_print_hex(unsigned long x) +{ + int i; + char hexchars[] = "0123456789ABCDEF"; + + for (i = 0; i < 8; i++) { + __debug_ser_out(hexchars[(x >> 28) & 15]); + x <<= 4; + } + __debug_ser_out('\n'); + __debug_ser_out('\r'); +} + +__apus +void __debug_print_string(char* s) +{ + unsigned char c; + while((c = *s++)) + __debug_ser_out(c); + __debug_ser_out('\n'); + __debug_ser_out('\r'); +} + +__apus +static void apus_progress(char *s, unsigned short value) +{ + __debug_print_string(s); +} + /****************************************************** init */ /* The number of spurious interrupts */ @@ -970,7 +1057,7 @@ void apus_init_IRQ(void) int i; for ( i = 0 ; i < NR_IRQS ; i++ ) - irq_desc[i].ctl = &amiga_irqctrl; + irq_desc[i].handler = &amiga_irqctrl; for (i = 0; i < NUM_IRQ_NODES; i++) nodes[i].handler = NULL; @@ -1015,7 +1102,7 @@ void apus_init(unsigned long r3, unsigned long r4, unsigned long r5, ppc_md.setup_arch = apus_setup_arch; ppc_md.setup_residual = NULL; ppc_md.get_cpuinfo = apus_get_cpuinfo; - ppc_md.irq_cannonicalize = NULL; + ppc_md.irq_cannonicalize = apus_irq_cannonicalize; ppc_md.init_IRQ = apus_init_IRQ; ppc_md.get_irq = apus_get_irq; ppc_md.post_irq = apus_post_irq; @@ -1023,6 +1110,10 @@ void apus_init(unsigned long r3, unsigned long r4, unsigned long r5, ppc_md.heartbeat = apus_heartbeat; ppc_md.heartbeat_count = 1; #endif +#ifdef APUS_DEBUG + __debug_serinit(); + ppc_md.progress = apus_progress; +#endif ppc_md.init = NULL; ppc_md.restart = apus_restart; diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index 013812afc..cc647a58b 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -435,7 +435,7 @@ _GLOBAL(fake_interrupt) * here so it's easy to add arch-specific sections later. * -- Cort */ -#if defined(CONFIG_CHRP) || defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) /* * On CHRP, the Run-Time Abstraction Services (RTAS) have to be * called with the MMU off. @@ -475,4 +475,4 @@ enter_rtas: mtspr SRR0,r8 mtspr SRR1,r9 rfi /* return to caller */ -#endif /* CONFIG_CHRP || CONFIG_PMAC || CONFIG_ALL_PPC */ +#endif /* CONFIG_ALL_PPC */ diff --git a/arch/ppc/kernel/hashtable.S b/arch/ppc/kernel/hashtable.S index c87385c53..5593ebe18 100644 --- a/arch/ppc/kernel/hashtable.S +++ b/arch/ppc/kernel/hashtable.S @@ -115,11 +115,6 @@ hash_page: stw r6,0(r2) /* update PTE (accessed/dirty bits) */ /* Convert linux-style PTE to low word of PPC-style PTE */ -#ifdef CONFIG_PPC64 - /* clear the high 32 bits just in case */ - clrldi r6,r6,32 - clrldi r4,r4,32 -#endif /* CONFIG_PPC64 */ rlwinm r4,r6,32-9,31,31 /* _PAGE_HWWRITE -> PP lsb */ rlwimi r6,r6,32-1,31,31 /* _PAGE_USER -> PP (both bits now) */ ori r4,r4,0xe04 /* clear out reserved bits */ @@ -151,10 +146,6 @@ hash_page: .globl hash_page_patch_A hash_page_patch_A: lis r4,Hash_base@h /* base address of hash table */ -#ifdef CONFIG_PPC64 - /* just in case */ - clrldi r4,r4,32 -#endif rlwimi r4,r5,32-1,26-Hash_bits,25 /* (VSID & hash_mask) << 6 */ rlwinm r0,r3,32-6,26-Hash_bits,25 /* (PI & hash_mask) << 6 */ xor r4,r4,r0 /* make primary hash */ @@ -169,43 +160,89 @@ hash_page_patch_A: /* Search the primary PTEG for a PTE whose 1st word matches r5 */ mtctr r2 addi r3,r4,-8 -1: lwzu r0,8(r3) /* get next PTE */ +1: +#ifdef CONFIG_PPC64 + lwzu r0,16(r3) /* get next PTE */ +#else + lwzu r0,8(r3) /* get next PTE */ +#endif cmp 0,r0,r5 bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ found_slot /* Search the secondary PTEG for a matching PTE */ +#ifdef CONFIG_PPC64 + ori r5,r5,0x2 /* set H (secondary hash) bit */ +#else ori r5,r5,0x40 /* set H (secondary hash) bit */ +#endif .globl hash_page_patch_B hash_page_patch_B: xoris r3,r4,Hash_msk>>16 /* compute secondary hash */ xori r3,r3,0xffc0 +#ifdef CONFIG_PPC64 + addi r3,r3,-16 +#else addi r3,r3,-8 +#endif mtctr r2 -2: lwzu r0,8(r3) +2: +#ifdef CONFIG_PPC64 + lwzu r0,16(r3) +#else + lwzu r0,8(r3) +#endif cmp 0,r0,r5 bdnzf 2,2b beq+ found_slot +#ifdef CONFIG_PPC64 + xori r5,r5,0x2 /* clear H bit again */ +#else xori r5,r5,0x40 /* clear H bit again */ +#endif /* Search the primary PTEG for an empty slot */ 10: mtctr r2 +#ifdef CONFIG_PPC64 + addi r3,r4,-16 /* search primary PTEG */ +#else addi r3,r4,-8 /* search primary PTEG */ -1: lwzu r0,8(r3) /* get next PTE */ +#endif +1: +#ifdef CONFIG_PPC64 + lwzu r0,16(r3) /* get next PTE */ + andi. r0,r0,1 +#else + lwzu r0,8(r3) /* get next PTE */ rlwinm. r0,r0,0,0,0 /* only want to check valid bit */ +#endif bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ beq+ found_empty /* Search the secondary PTEG for an empty slot */ +#ifdef CONFIG_PPC64 + ori r5,r5,0x2 /* set H (secondary hash) bit */ +#else ori r5,r5,0x40 /* set H (secondary hash) bit */ +#endif .globl hash_page_patch_C hash_page_patch_C: xoris r3,r4,Hash_msk>>16 /* compute secondary hash */ xori r3,r3,0xffc0 +#ifdef CONFIG_PPC64 + addi r3,r3,-16 +#else addi r3,r3,-8 +#endif mtctr r2 -2: lwzu r0,8(r3) +2: +#ifdef CONFIG_PPC64 + lwzu r0,16(r3) + andi. r0,r0,1 +#else + lwzu r0,8(r3) rlwinm. r0,r0,0,0,0 /* only want to check valid bit */ +#endif bdnzf 2,2b beq+ found_empty @@ -218,12 +255,21 @@ hash_page_patch_C: * advantage to putting the PTE in the primary PTEG, we always * put the PTE in the primary PTEG. */ +#ifdef CONFIG_PPC64 + xori r5,r5,0x2 /* clear H bit again */ +#else xori r5,r5,0x40 /* clear H bit again */ +#endif lis r3,next_slot@ha tophys(r3,r3) lwz r2,next_slot@l(r3) +#ifdef CONFIG_PPC64 + addi r2,r2,16 + andi. r2,r2,0x78 +#else addi r2,r2,8 andi. r2,r2,0x38 +#endif stw r2,next_slot@l(r3) add r3,r4,r2 11: @@ -237,9 +283,17 @@ hash_page_patch_C: #ifndef __SMP__ /* Store PTE in PTEG */ found_empty: +#ifdef CONFIG_PPC64 + std r5,0(r3) +#else stw r5,0(r3) +#endif found_slot: +#ifdef CONFIG_PPC64 + std r6,8(r3) +#else stw r6,4(r3) +#endif sync #else /* __SMP__ */ diff --git a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S index dd16b8c27..b6d44ecb3 100644 --- a/arch/ppc/kernel/head.S +++ b/arch/ppc/kernel/head.S @@ -1328,18 +1328,15 @@ load_up_mmu: /* Load the SDR1 register (hash table base & size) */ lis r6,_SDR1@ha tophys(r6,r6) -#ifdef CONFIG_PPC64 - ld r6,_SDR1@l(r6) + lwz r6,_SDR1@l(r6) mtspr SDR1,r6 +#ifdef CONFIG_PPC64 /* clear the v bit in the ASR so we can * behave as if we have segment registers * -- Cort */ clrldi r6,r6,63 mtasr r6 -#else - lwz r6,_SDR1@l(r6) - mtspr SDR1,r6 #endif /* CONFIG_PPC64 */ li r0,16 /* load up segment register values */ mtctr r0 /* for context 0 */ diff --git a/arch/ppc/kernel/irq.c b/arch/ppc/kernel/irq.c index fd77fbc36..ffac1871a 100644 --- a/arch/ppc/kernel/irq.c +++ b/arch/ppc/kernel/irq.c @@ -70,8 +70,6 @@ volatile unsigned char *chrp_int_ack_special; #define MAXCOUNT 10000000 -#define NR_MASK_WORDS ((NR_IRQS + 31) / 32) - irq_desc_t irq_desc[NR_IRQS]; int ppc_spurious_interrupts = 0; unsigned int local_bh_count[NR_CPUS]; @@ -81,7 +79,6 @@ unsigned int ppc_cached_irq_mask[NR_MASK_WORDS]; unsigned int ppc_lost_interrupts[NR_MASK_WORDS]; atomic_t ppc_n_lost_interrupts; - /* nasty hack for shared irq's since we need to do kmalloc calls but * can't very early in the boot when we need to do a request irq. * this needs to be removed. diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 50f63eeb4..8444bb4a0 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -241,12 +241,21 @@ _GLOBAL(__flush_page_to_ram) rlwinm r5,r5,16,16,31 cmpi 0,r5,1 beqlr /* for 601, do nothing */ + li r4,0x0FFF + andc r3,r3,r4 /* Get page base address */ li r4,4096/CACHE_LINE_SIZE /* Number of lines in a page */ mtctr r4 + mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ addi r3,r3,CACHE_LINE_SIZE bdnz 0b sync + mtctr r4 +1: icbi 0,r6 + addi r6,r6,CACHE_LINE_SIZE + bdnz 1b + sync + isync blr /* @@ -270,7 +279,7 @@ _GLOBAL(__flush_icache_page) sync isync blr - + /* * Clear a page using the dcbz instruction, which doesn't cause any * memory traffic (except to write out any cache lines which get diff --git a/arch/ppc/kernel/mk_defs.c b/arch/ppc/kernel/mk_defs.c index 4f3c6834d..c381ea073 100644 --- a/arch/ppc/kernel/mk_defs.c +++ b/arch/ppc/kernel/mk_defs.c @@ -99,6 +99,7 @@ main(void) DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); + DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); diff --git a/arch/ppc/kernel/pmac_pic.c b/arch/ppc/kernel/pmac_pic.c index d13875c9f..b0276ca2c 100644 --- a/arch/ppc/kernel/pmac_pic.c +++ b/arch/ppc/kernel/pmac_pic.c @@ -39,6 +39,17 @@ extern int pmac_pcibios_read_config_word(unsigned char bus, unsigned char dev_fn extern int pmac_pcibios_write_config_word(unsigned char bus, unsigned char dev_fn, unsigned char offset, unsigned short val); +/* + * Mark an irq as "lost". This is only used on the pmac + * since it can lose interrupts (see pmac_set_irq_mask). + * -- Cort + */ +void __pmac __no_use_set_lost(unsigned long irq_nr) +{ + if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) + atomic_inc(&ppc_n_lost_interrupts); +} + static void pmac_openpic_mask_irq(unsigned int irq_nr) { openpic_disable_irq(irq_nr); @@ -105,10 +116,8 @@ static void __pmac pmac_set_irq_mask(unsigned int irq_nr) */ if ((bit & ppc_cached_irq_mask[i]) && (ld_le32(&pmac_irq_hw[i]->level) & bit) - && !(ld_le32(&pmac_irq_hw[i]->flag) & bit)) { - if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) - atomic_inc(&ppc_n_lost_interrupts); - } + && !(ld_le32(&pmac_irq_hw[i]->flag) & bit)) + __set_lost((ulong)irq_nr); } static void __pmac pmac_mask_irq(unsigned int irq_nr) @@ -174,6 +183,8 @@ pmac_get_irq(struct pt_regs *regs) unsigned long bits = 0; #ifdef __SMP__ + void pmac_smp_message_recv(void); + /* IPI's are a hack on the powersurge -- Cort */ if ( smp_processor_id() != 0 ) { @@ -182,12 +193,12 @@ pmac_get_irq(struct pt_regs *regs) if (xmon_2nd) xmon(regs); #endif - smp_message_recv(); + pmac_smp_message_recv(); return -2; /* ignore, already handled */ } #endif /* __SMP__ */ - /* Yeah, I know, this could be a separate do_IRQ function */ + /* Yeah, I know, this could be a separate get_irq function */ if (has_openpic) { irq = openpic_irq(smp_processor_id()); @@ -376,6 +387,7 @@ pmac_pic_init(void) irqctrler = NULL; } + int_control.int_set_lost = __no_use_set_lost; /* * G3 powermacs and 1999 G3 PowerBooks have 64 interrupts, * 1998 G3 Series PowerBooks have 128, diff --git a/arch/ppc/kernel/ppc_htab.c b/arch/ppc/kernel/ppc_htab.c index 264a24d48..da46f3c1c 100644 --- a/arch/ppc/kernel/ppc_htab.c +++ b/arch/ppc/kernel/ppc_htab.c @@ -44,19 +44,12 @@ extern unsigned long htab_evicts; extern unsigned long pte_misses; extern unsigned long pte_errors; -static struct file_operations ppc_htab_operations = { +struct file_operations ppc_htab_operations = { llseek: ppc_htab_lseek, read: ppc_htab_read, write: ppc_htab_write, }; -/* - * proc files can do almost nothing.. - */ -struct inode_operations proc_ppc_htab_inode_operations = { - &ppc_htab_operations, /* default proc file-ops */ -}; - /* these will go into processor.h when I'm done debugging -- Cort */ #define MMCR0 952 #define MMCR0_PMC1_CYCLES (0x1<<7) diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index 757715512..87c8d4082 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -34,6 +34,7 @@ #include <asm/feature.h> #include <asm/dma.h> #include <asm/machdep.h> +#include <asm/hw_irq.h> #ifdef __SMP__ #include <asm/smplock.h> #endif /* __SMP__ */ @@ -50,7 +51,6 @@ extern void AlignmentException(struct pt_regs *regs); extern void ProgramCheckException(struct pt_regs *regs); extern void SingleStepException(struct pt_regs *regs); extern int sys_sigreturn(struct pt_regs *regs); -extern atomic_t ppc_n_lost_interrupts; extern void do_lost_interrupts(unsigned long); extern int do_signal(sigset_t *, struct pt_regs *); @@ -69,6 +69,7 @@ EXPORT_SYMBOL(ProgramCheckException); EXPORT_SYMBOL(SingleStepException); EXPORT_SYMBOL(sys_sigreturn); EXPORT_SYMBOL(ppc_n_lost_interrupts); +EXPORT_SYMBOL(ppc_lost_interrupts); EXPORT_SYMBOL(do_lost_interrupts); EXPORT_SYMBOL(enable_irq); EXPORT_SYMBOL(disable_irq); @@ -88,7 +89,7 @@ EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); #ifndef CONFIG_8xx -#if defined(CONFIG_PREP) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) EXPORT_SYMBOL(_prep_type); EXPORT_SYMBOL(ucSystemType); #endif @@ -125,7 +126,6 @@ EXPORT_SYMBOL(strtok); EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strnlen); -EXPORT_SYMBOL(strspn); EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); @@ -227,7 +227,7 @@ EXPORT_SYMBOL(pmu_register_sleep_notifier); EXPORT_SYMBOL(pmu_unregister_sleep_notifier); EXPORT_SYMBOL(pmu_enable_irled); #endif CONFIG_PMAC_PBOOK -#if defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) EXPORT_SYMBOL(find_devices); EXPORT_SYMBOL(find_type_devices); EXPORT_SYMBOL(find_compatible_devices); @@ -243,8 +243,8 @@ EXPORT_SYMBOL(pci_device_loc); EXPORT_SYMBOL(feature_set); EXPORT_SYMBOL(feature_clear); EXPORT_SYMBOL(feature_test); -#endif /* defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC) */ -#if defined(CONFIG_SCSI) && (defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC)) +#endif /* defined(CONFIG_ALL_PPC) */ +#if defined(CONFIG_SCSI) && defined(CONFIG_ALL_PPC) EXPORT_SYMBOL(note_scsi_host); #endif EXPORT_SYMBOL(kd_mksound); @@ -270,7 +270,6 @@ EXPORT_SYMBOL(screen_info); EXPORT_SYMBOL(int_control); EXPORT_SYMBOL(timer_interrupt_intercept); EXPORT_SYMBOL(timer_interrupt); -extern unsigned long do_IRQ_intercept; EXPORT_SYMBOL(do_IRQ_intercept); EXPORT_SYMBOL(irq_desc); void ppc_irq_dispatch_handler(struct pt_regs *, int); @@ -278,3 +277,7 @@ EXPORT_SYMBOL(ppc_irq_dispatch_handler); EXPORT_SYMBOL(decrementer_count); EXPORT_SYMBOL(get_wchan); EXPORT_SYMBOL(console_drivers); +#ifdef CONFIG_XMON +EXPORT_SYMBOL(xmon); +#endif +EXPORT_SYMBOL(down_read_failed); diff --git a/arch/ppc/kernel/process.c b/arch/ppc/kernel/process.c index 41382b2d7..5c01d3c72 100644 --- a/arch/ppc/kernel/process.c +++ b/arch/ppc/kernel/process.c @@ -158,7 +158,7 @@ enable_kernel_altivec(void) if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) giveup_altivec(current); else - giveup_altivec(NULL): /* just enable AltiVec for kernel - force */ + giveup_altivec(NULL); /* just enable AltiVec for kernel - force */ #else giveup_altivec(last_task_used_altivec); #endif /* __SMP __ */ diff --git a/arch/ppc/kernel/prom.c b/arch/ppc/kernel/prom.c index b86e2a153..4ee638f62 100644 --- a/arch/ppc/kernel/prom.c +++ b/arch/ppc/kernel/prom.c @@ -604,7 +604,6 @@ prom_init(int r3, int r4, prom_entry pp) /* XXX: hack - don't start cpu 0, this cpu -- Cort */ if ( smp_chrp_cpu_nr++ == 0 ) continue; - RELOC(smp_ibm_chrp_hack) = 1; prom_print(RELOC("starting cpu ")); prom_print(path); *(unsigned long *)(0x4) = 0; diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index 7502ad08e..5a57ba8a2 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -733,17 +733,13 @@ void ppc_generic_ide_fix_driveid(struct hd_driveid *id) id->eide_dma_time = __le16_to_cpu(id->eide_dma_time); id->eide_pio = __le16_to_cpu(id->eide_pio); id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy); - id->word69 = __le16_to_cpu(id->word69); - id->word70 = __le16_to_cpu(id->word70); - id->word71 = __le16_to_cpu(id->word71); - id->word72 = __le16_to_cpu(id->word72); - id->word73 = __le16_to_cpu(id->word73); - id->word74 = __le16_to_cpu(id->word74); + for (i=0; i<2 i++) + id->words69_70[i] = __le16_to_cpu(id->words69_70[i]); + for (i=0; i<4 i++) + id->words71_74[i] = __le16_to_cpu(id->words71_74[i]); id->queue_depth = __le16_to_cpu(id->queue_depth); - id->word76 = __le16_to_cpu(id->word76); - id->word77 = __le16_to_cpu(id->word77); - id->word78 = __le16_to_cpu(id->word78); - id->word79 = __le16_to_cpu(id->word79); + for (i=0; i<4 i++) + id->words76_79[i] = __le16_to_cpu(id->words76_79[i]); id->major_rev_num = __le16_to_cpu(id->major_rev_num); id->minor_rev_num = __le16_to_cpu(id->minor_rev_num); id->command_set_1 = __le16_to_cpu(id->command_set_1); @@ -758,40 +754,14 @@ void ppc_generic_ide_fix_driveid(struct hd_driveid *id) id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues); id->word92 = __le16_to_cpu(id->word92); id->hw_config = __le16_to_cpu(id->hw_config); - id->word94 = __le16_to_cpu(id->word94); - id->word95 = __le16_to_cpu(id->word95); - id->word96 = __le16_to_cpu(id->word96); - id->word97 = __le16_to_cpu(id->word97); - id->word98 = __le16_to_cpu(id->word98); - id->word99 = __le16_to_cpu(id->word99); - id->word100 = __le16_to_cpu(id->word100); - id->word101 = __le16_to_cpu(id->word101); - id->word102 = __le16_to_cpu(id->word102); - id->word103 = __le16_to_cpu(id->word103); - id->word104 = __le16_to_cpu(id->word104); - id->word105 = __le16_to_cpu(id->word105); - id->word106 = __le16_to_cpu(id->word106); - id->word107 = __le16_to_cpu(id->word107); - id->word108 = __le16_to_cpu(id->word108); - id->word109 = __le16_to_cpu(id->word109); - id->word110 = __le16_to_cpu(id->word110); - id->word111 = __le16_to_cpu(id->word111); - id->word112 = __le16_to_cpu(id->word112); - id->word113 = __le16_to_cpu(id->word113); - id->word114 = __le16_to_cpu(id->word114); - id->word115 = __le16_to_cpu(id->word115); - id->word116 = __le16_to_cpu(id->word116); - id->word117 = __le16_to_cpu(id->word117); - id->word118 = __le16_to_cpu(id->word118); - id->word119 = __le16_to_cpu(id->word119); - id->word120 = __le16_to_cpu(id->word120); - id->word121 = __le16_to_cpu(id->word121); - id->word122 = __le16_to_cpu(id->word122); - id->word123 = __le16_to_cpu(id->word123); - id->word124 = __le16_to_cpu(id->word124); - id->word125 = __le16_to_cpu(id->word125); - id->word126 = __le16_to_cpu(id->word126); + for (i=0; i<34; i++) + id->words94_125[i] = __le16_to_cpu(id->words94_125[i]); + id->last_lun = __le16_to_cpu(id->last_lun); id->word127 = __le16_to_cpu(id->word127); - for (i=0; i<127; i++) - id->reserved[i] = __le16_to_cpu(id->reserved[i]); + id->dlf = __le16_to_cpu(id->dlf); + id->csfo = __le16_to_cpu(id->csfo); + for (i=0; i<31; i++) + id->words130_159[i] = __le16_to_cpu(id->words130_159[i]); + for (i=0; i<97; i++) + id->words160_255[i] = __le16_to_cpu(id->words160_255[i]); } diff --git a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c index 83dff9246..97543348b 100644 --- a/arch/ppc/kernel/smp.c +++ b/arch/ppc/kernel/smp.c @@ -12,6 +12,7 @@ * (troy@microux.com, hozer@drgw.net) */ +#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> @@ -445,8 +446,10 @@ void __init smp_callin(void) */ if ( _machine & (_MACH_gemini|_MACH_chrp|_MACH_prep) ) do_openpic_setup_cpu(); +#ifdef CONFIG_GEMINI if ( _machine == _MACH_gemini ) gemini_init_l2(); +#endif while(!smp_commenced) barrier(); __sti(); diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index 25d728fdd..2faccd042 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c @@ -115,11 +115,7 @@ extern struct task_struct *current_set[NR_CPUS]; PTE *Hash, *Hash_end; unsigned long Hash_size, Hash_mask; #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx) -#ifdef CONFIG_PPC64 -unsigned long long _SDR1; -#else unsigned long _SDR1; -#endif static void hash_init(void); union ubat { /* BAT register values to be loaded */ @@ -423,10 +419,9 @@ __ioremap(unsigned long addr, unsigned long size, unsigned long flags) /* * Is it a candidate for a BAT mapping? */ - for (i = 0; i < size; i += PAGE_SIZE) map_page(v+i, p+i, flags); -out: +out: return (void *) (v + (addr & ~PAGE_MASK)); } @@ -593,7 +588,7 @@ mmu_context_overflow(void) #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx) static void get_mem_prop(char *, struct mem_pieces *); -#if defined(CONFIG_PMAC) || defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) /* * Read in a property describing some pieces of memory. */ @@ -616,7 +611,7 @@ static void __init get_mem_prop(char *name, struct mem_pieces *mp) mem_pieces_sort(mp); mem_pieces_coalesce(mp); } -#endif /* CONFIG_PMAC || CONFIG_CHRP || CONFIG_ALL_PPC */ +#endif /* CONFIG_ALL_PPC */ /* * Set up one of the I/D BAT (block address translation) register pairs. @@ -921,10 +916,11 @@ void __init MMU_init(void) if ( ppc_md.progress ) ppc_md.progress("MMU:hash init", 0x300); hash_init(); #ifdef CONFIG_PPC64 - _SDR1 = 0; /* temporary hack to just use bats -- Cort */ -#else + _SDR1 = __pa(Hash) | (ffz(~Hash_size) - 7)-11; +#else _SDR1 = __pa(Hash) | (Hash_mask >> 10); -#endif +#endif + ioremap_base = 0xf8000000; if ( ppc_md.progress ) ppc_md.progress("MMU:mapin", 0x301); @@ -947,7 +943,7 @@ void __init MMU_init(void) setbat(0, 0xf8000000, 0xf8000000, 0x08000000, IO_PAGE); #ifdef CONFIG_PPC64 /* temporary hack to get working until page tables are stable -- Cort*/ - setbat(1, 0x80000000, 0xc0000000, 0x10000000, IO_PAGE); +/* setbat(1, 0x80000000, 0xc0000000, 0x10000000, IO_PAGE);*/ setbat(3, 0xd0000000, 0xd0000000, 0x10000000, IO_PAGE); #else setbat(1, 0x80000000, 0x80000000, 0x10000000, IO_PAGE); @@ -1118,7 +1114,7 @@ void __init paging_init(void) /* * All pages are DMA-able so we put them all in the DMA zone. */ - zones_size[0] = virt_to_phys(end_of_DRAM) >> PAGE_SHIFT; + zones_size[0] = ((unsigned long)end_of_DRAM - KERNELBASE) >> PAGE_SHIFT; for (i = 1; i < MAX_NR_ZONES; i++) zones_size[i] = 0; free_area_init(zones_size); @@ -1132,9 +1128,9 @@ void __init mem_init(void) int codepages = 0; int datapages = 0; int initpages = 0; -#if defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) extern unsigned int rtas_data, rtas_size; -#endif /* defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) */ +#endif /* defined(CONFIG_ALL_PPC) */ max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); num_physpages = max_mapnr; /* RAM is assumed contiguous */ @@ -1150,13 +1146,13 @@ void __init mem_init(void) } #endif /* CONFIG_BLK_DEV_INITRD */ -#if defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) /* mark the RTAS pages as reserved */ if ( rtas_data ) for (addr = rtas_data; addr < PAGE_ALIGN(rtas_data+rtas_size) ; addr += PAGE_SIZE) SetPageReserved(mem_map + MAP_NR(addr)); -#endif /* defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) */ +#endif /* defined(CONFIG_ALL_PPC) */ if ( sysmap_size ) for (addr = (unsigned long)sysmap; addr < PAGE_ALIGN((unsigned long)sysmap+sysmap_size) ; @@ -1178,13 +1174,14 @@ void __init mem_init(void) printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08x,%08lx]\n", (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10), - codepages, datapages, initpages, + codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10), + initpages<< (PAGE_SHIFT-10), PAGE_OFFSET, (unsigned long) end_of_DRAM); mem_init_done = 1; } #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx) -#if defined(CONFIG_PMAC) || defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) /* * On systems with Open Firmware, collect information about * physical RAM and which pieces are already in use. @@ -1195,9 +1192,13 @@ void __init mem_init(void) unsigned long __init *pmac_find_end_of_memory(void) { unsigned long a, total; - - /* max amount of RAM we allow -- Cort */ -#define RAM_LIMIT (768<<20) + unsigned long ram_limit = 0xf0000000 - KERNELBASE; + /* allow 0x08000000 for IO space */ + if ( _machine & (_MACH_prep|_MACH_Pmac) ) + ram_limit = 0xd8000000 - KERNELBASE; +#ifdef CONFIG_PPC64 + ram_limit = 64<<20; +#endif memory_node = find_devices("memory"); if (memory_node == NULL) { @@ -1222,16 +1223,8 @@ unsigned long __init *pmac_find_end_of_memory(void) a = phys_mem.regions[0].address; if (a != 0) panic("RAM doesn't start at physical address 0"); - /* - * XXX: - * Make sure ram mappings don't stomp on IO space - * This is a temporary hack to keep this from happening - * until we move the KERNELBASE and can allocate RAM up - * to our nearest IO area. - * -- Cort - */ - if (__max_memory == 0 || __max_memory > RAM_LIMIT) - __max_memory = RAM_LIMIT; + if (__max_memory == 0 || __max_memory > ram_limit) + __max_memory = ram_limit; if (phys_mem.regions[0].size >= __max_memory) { phys_mem.regions[0].size = __max_memory; phys_mem.n_regions = 1; @@ -1247,12 +1240,11 @@ unsigned long __init *pmac_find_end_of_memory(void) set_phys_avail(&phys_mem); -#undef RAM_LIMIT return __va(total); } -#endif /* CONFIG_PMAC || CONFIG_CHRP || CONFIG_ALL_PPC */ +#endif /* CONFIG_ALL_PPC */ -#if defined(CONFIG_PREP) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_ALL_PPC) /* * This finds the amount of physical ram and does necessary * setup for prep. This is pretty architecture specific so @@ -1279,7 +1271,7 @@ unsigned long __init *prep_find_end_of_memory(void) return (__va(total)); } -#endif /* defined(CONFIG_PREP) || defined(CONFIG_ALL_PPC) */ +#endif /* defined(CONFIG_ALL_PPC) */ #if defined(CONFIG_GEMINI) @@ -1389,16 +1381,12 @@ static void __init hash_init(void) * up to a maximum of 2MB. */ ramsize = (ulong)end_of_DRAM - KERNELBASE; -#ifdef CONFIG_PPC64 - Hash_mask = 0; - for (h = 256<<10; h < ramsize / 256 && h < 4<<20; h *= 2, Hash_mask++) - ; - Hash_size = h; - Hash_mask <<= 10; /* so setting _SDR1 works the same -- Cort */ -#else for (h = 64<<10; h < ramsize / 256 && h < 2<<20; h *= 2) ; Hash_size = h; +#ifdef CONFIG_PPC64 + Hash_mask = (h >> 7) - 1; +#else Hash_mask = (h >> 6) - 1; #endif @@ -1433,7 +1421,11 @@ static void __init hash_init(void) /* * Patch up the instructions in head.S:hash_page */ +#ifdef CONFIG_PPC64 + Hash_bits = ffz(~Hash_size) - 7; +#else Hash_bits = ffz(~Hash_size) - 6; +#endif hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) | (__pa(Hash) >> 16); hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) @@ -1443,9 +1435,17 @@ static void __init hash_init(void) hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | ((26 - Hash_bits) << 6); hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) +#ifdef CONFIG_PPC64 + | (Hash_mask >> 11); +#else | (Hash_mask >> 10); +#endif hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) +#ifdef CONFIG_PPC64 + | (Hash_mask >> 11); +#else | (Hash_mask >> 10); +#endif #if 0 /* see hash_page in head.S, note also patch_C ref below */ hash_page_patch_D[0] = (hash_page_patch_D[0] & ~0xffff) | (Hash_mask >> 10); diff --git a/arch/ppc/mm/mem_pieces.c b/arch/ppc/mm/mem_pieces.c index e695d5a0a..309a526f5 100644 --- a/arch/ppc/mm/mem_pieces.c +++ b/arch/ppc/mm/mem_pieces.c @@ -127,7 +127,7 @@ mem_pieces_print(struct mem_pieces *mp) printk("\n"); } -#if defined(CONFIG_PREP) || defined(CONFIG_APUS) || defined(CONFIG_ALL_PPC) +#if defined(CONFIG_APUS) || defined(CONFIG_ALL_PPC) /* * Add some memory to an array of pieces */ diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c index a0da2f1b4..d18d74dfd 100644 --- a/arch/ppc/xmon/xmon.c +++ b/arch/ppc/xmon/xmon.c @@ -75,11 +75,14 @@ static void take_input(char *); static unsigned read_spr(int); static void write_spr(int, unsigned); static void super_regs(void); +static void print_sysmap(void); static void remove_bpts(void); static void insert_bpts(void); static struct bpt *at_breakpoint(unsigned pc); static void bpt_cmds(void); static void cacheflush(void); +static char *pretty_lookup_name(unsigned long addr); +static char *lookup_name(unsigned long addr); extern int print_insn_big_powerpc(FILE *, unsigned long, unsigned); extern void printf(const char *fmt, ...); @@ -101,6 +104,7 @@ Commands:\n\ mm move a block of memory\n\ ms set a block of memory\n\ md compare two blocks of memory\n\ + M print System.map\n\ r print registers\n\ S print special registers\n\ t print backtrace\n\ @@ -337,6 +341,8 @@ cmds(struct pt_regs *excp) else excprint(excp); break; + case 'M': + print_sysmap(); case 'S': super_regs(); break; @@ -514,8 +520,10 @@ getsp() void excprint(struct pt_regs *fp) { - printf("vector: %x at pc = %x, msr = %x, sp = %x [%x]\n", - fp->trap, fp->nip, fp->msr, fp->gpr[1], fp); + printf("vector: %x at pc = %x %s", + fp->trap, fp->nip,/* pretty_lookup_name(fp->nip)*/""); + printf(", msr = %x, sp = %x [%x]\n", + fp->msr, fp->gpr[1], fp); if (fp->trap == 0x300 || fp->trap == 0x600) printf("dar = %x, dsisr = %x\n", fp->dar, fp->dsisr); if (current) @@ -597,6 +605,14 @@ extern char exc_prolog; extern char dec_exc; void +print_sysmap(void) +{ + extern char *sysmap; + if ( sysmap ) + printf("System.map: \n%s", sysmap); +} + +void super_regs() { int i, cmd; @@ -1345,9 +1361,26 @@ char *str; lineptr = str; } +/* + * We use this array a lot here. We assume we don't have multiple + * instances of xmon running and that we don't use the return value of + * any functions other than printing them. + * -- Cort + */ char last[64]; -char * -lookup_addr(unsigned long addr) +static char *pretty_lookup_name(unsigned long addr) +{ + if ( lookup_name(addr) ) + { + sprintf(last, " (%s)", lookup_name(addr)); + return last; + } + else + return NULL; +} + + +static char *lookup_name(unsigned long addr) { extern char *sysmap; extern unsigned long sysmap_size; @@ -1357,10 +1390,6 @@ lookup_addr(unsigned long addr) if ( !sysmap || !sysmap_size ) return NULL; - /* adjust if addr is relative to kernelbase */ - if ( addr < PAGE_OFFSET ) - addr += PAGE_OFFSET; - cmp = simple_strtoul(c, &c, 8); strcpy( last, strsep( &c, "\n")); while ( c < (sysmap+sysmap_size) ) @@ -1372,3 +1401,4 @@ lookup_addr(unsigned long addr) } return last; } + diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile index b0f7f63ea..a7a562549 100644 --- a/arch/sparc/boot/Makefile +++ b/arch/sparc/boot/Makefile @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.9 1998/10/26 20:01:03 davem Exp $ +# $Id: Makefile,v 1.10 2000/02/23 08:17:46 jj Exp $ # Makefile for the Sparc boot stuff. # # Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -22,16 +22,20 @@ btfixupprep: btfixupprep.c clean: rm -f btfixupprep piggyback tftpboot.img btfix.o btfix.s -BTOBJS := $(HEAD) init/main.o init/version.o \ - $(CORE_FILES_NO_BTFIX) $(FILESYSTEMS) \ - $(NETWORKS) $(DRIVERS) +BTOBJS := $(HEAD) init/main.o init/version.o +BTLIBS := $(CORE_FILES_NO_BTFIX) $(FILESYSTEMS) \ + $(DRIVERS) $(NETWORKS) # I wanted to make this depend upon BTOBJS so that a parallel # build would work, but this fails because $(HEAD) cannot work # properly as it will cause head.o to be built with the implicit # rules not the ones in kernel/Makefile. Someone please fix. --DaveM vmlinux.o: dummy - $(LD) -r $(patsubst %,$(TOPDIR)/%,$(BTOBJS)) $(LIBS) -o vmlinux.o + $(LD) -r $(patsubst %,$(TOPDIR)/%,$(BTOBJS)) \ + --start-group \ + $(patsubst %,$(TOPDIR)/%,$(BTLIBS)) \ + $(LIBS) \ + --end-group -o vmlinux.o btfix.s: btfixupprep vmlinux.o $(OBJDUMP) -x vmlinux.o | ./btfixupprep > btfix.s diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 8c8903d26..f0dbea065 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -695,42 +695,6 @@ _sparc_io_get_info(char *buf, char **start, off_t fpos, int length, int *eof, return p-buf; } -static struct proc_dir_entry _sparc_iomap_proc_entry = { - 0, /* Inode number - dynamic */ - 6, /* Length of the file name */ - "io_map", /* The file name */ - S_IFREG | S_IRUGO, /* File mode */ - 1, /* Number of links */ - 0, 0, /* The uid and gid for the file */ - 0, /* The size of the file reported by ls. */ - NULL, /* struct inode_operations * ops */ - NULL, /* get_info: backward compatibility */ - NULL, /* owner */ - NULL, NULL, NULL, /* linkage */ - &sparc_iomap, - _sparc_io_get_info, /* The read function for this file */ - NULL, - /* and more stuff */ -}; - -static struct proc_dir_entry _sparc_dvma_proc_entry = { - 0, /* Inode number - dynamic */ - 8, /* Length of the file name */ - "dvma_map", /* The file name */ - S_IFREG | S_IRUGO, /* File mode */ - 1, /* Number of links */ - 0, 0, /* The uid and gid for the file */ - 0, /* The size of the file reported by ls. */ - NULL, /* struct inode_operations * ops */ - NULL, /* get_info: backward compatibility */ - NULL, /* owner */ - NULL, NULL, NULL, /* linkage */ - &_sparc_dvma, - _sparc_io_get_info, - NULL, - /* some more stuff */ -}; - #endif CONFIG_PROC_FS /* @@ -782,7 +746,7 @@ void ioport_init(void) }; #ifdef CONFIG_PROC_FS - proc_register(&proc_root, &_sparc_iomap_proc_entry); - proc_register(&proc_root, &_sparc_dvma_proc_entry); + create_proc_read_entry("io_map",0,0,_sparc_io_get_info,&sparc_iomap); + create_proc_read_entry("dvma_map",0,0,_sparc_io_get_info,&_sparc_dvma); #endif } diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 471929a01..07aefa660 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -1,4 +1,4 @@ -/* $Id: irq.c,v 1.101 2000/02/09 11:15:03 davem Exp $ +/* $Id: irq.c,v 1.102 2000/02/25 05:44:35 davem Exp $ * arch/sparc/kernel/irq.c: Interrupt request handling routines. On the * Sparc the IRQ's are basically 'cast in stone' * and you are supposed to probe the prom's device @@ -713,3 +713,8 @@ void __init init_IRQ(void) } btfixup(); } + +void init_irq_proc(void) +{ + /* For now, nothing... */ +} diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c index 50d682929..d4ac34932 100644 --- a/arch/sparc/kernel/setup.c +++ b/arch/sparc/kernel/setup.c @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.114 2000/01/29 01:08:57 anton Exp $ +/* $Id: setup.c,v 1.115 2000/02/26 04:24:31 davem Exp $ * linux/arch/sparc/kernel/setup.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -294,6 +294,8 @@ static struct console prom_console = { "PROM", prom_cons_write, 0, 0, 0, 0, 0, CON_PRINTBUFFER, 0, 0, 0 }; +extern void paging_init(void); + void __init setup_arch(char **cmdline_p) { int i; @@ -478,6 +480,8 @@ void __init setup_arch(char **cmdline_p) if (serial_console) conswitchp = NULL; + + paging_init(); } asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c index cdc1f0751..d4585d9d5 100644 --- a/arch/sparc/kernel/sparc_ksyms.c +++ b/arch/sparc/kernel/sparc_ksyms.c @@ -1,4 +1,4 @@ -/* $Id: sparc_ksyms.c,v 1.91 2000/02/18 20:23:24 davem Exp $ +/* $Id: sparc_ksyms.c,v 1.93 2000/02/26 11:02:45 anton Exp $ * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -92,7 +92,6 @@ __attribute__((section("__ksymtab"))) = \ /* used by various drivers */ EXPORT_SYMBOL(sparc_cpu_model); -EXPORT_SYMBOL_PRIVATE(_spinlock_waitfor); EXPORT_SYMBOL(kernel_thread); #ifdef SPIN_LOCK_DEBUG EXPORT_SYMBOL(_do_spin_lock); @@ -246,7 +245,6 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strtok); EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strspn); /* Special internal versions of library functions. */ EXPORT_SYMBOL(__copy_1page); diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S index 102541b18..e199f3813 100644 --- a/arch/sparc/lib/locks.S +++ b/arch/sparc/lib/locks.S @@ -1,4 +1,4 @@ -/* $Id: locks.S,v 1.15 1998/10/14 09:18:55 jj Exp $ +/* $Id: locks.S,v 1.16 2000/02/26 11:02:47 anton Exp $ * locks.S: SMP low-level lock primitives on Sparc. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -15,25 +15,6 @@ .text .align 4 - /* This is called when the initial acquisition attempt of a spin - * lock fails. The calling convention is weird, return address - * is in %o7 as usual but we agree with the caller to only touch - * and use %g2 as a temporary. We are passed a ptr to the lock - * itself in %g1, %g4 must be restored into %o7 when we return, - * and the caller wants us to return to him at three instructions - * previous to the call instruction which got us here. See how - * this is used in asm/spinlock.h if what I just said confuses - * you to no end. - */ - .globl ___spinlock_waitfor -___spinlock_waitfor: -1: orcc %g2, 0x0, %g0 - bne,a 1b - ldub [%g1], %g2 - ldstub [%g1], %g2 - jmpl %o7 - 12, %g0 - mov %g4, %o7 - /* Read/writer locks, as usual this is overly clever to make it * as fast as possible. */ diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index 6736dc9d3..3ac49a10b 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.80 2000/02/09 21:11:06 davem Exp $ +/* $Id: init.c,v 1.81 2000/02/26 11:59:31 anton Exp $ * linux/arch/sparc/mm/init.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -40,7 +40,7 @@ unsigned long phys_base; struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS]; unsigned long sparc_unmapped_base; -struct pgtable_cache_struct pgt_quicklists; +struct pgtable_cache_struct pgt_quicklists = { 0, 0, 0, 0, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED }; /* References to section boundaries */ extern char __init_begin, __init_end, _start, end, etext , edata; diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 917bb5e74..12e9432a7 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -273,9 +273,9 @@ CONFIG_SUNBMAC=m CONFIG_SUNQE=m CONFIG_DE4X5=m CONFIG_VORTEX=m -CONFIG_RTL8139=m +CONFIG_8139TOO=m CONFIG_NE2K_PCI=m -CONFIG_EEXPRESS_PRO100=m +CONFIG_EEPRO100=m CONFIG_ADAPTEC_STARFIRE=m # @@ -299,7 +299,7 @@ CONFIG_VIDEO_DEV=y # CONFIG_VIDEO_BT848 is not set # -# Filesystems +# File systems # # CONFIG_QUOTA is not set CONFIG_AUTOFS_FS=m diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index cb659b655..ed9e49685 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -1,4 +1,4 @@ -/* $Id: irq.c,v 1.83 2000/02/11 06:57:17 jj Exp $ +/* $Id: irq.c,v 1.84 2000/02/25 05:44:41 davem Exp $ * irq.c: UltraSparc IRQ handling/init/registry. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -1156,3 +1156,8 @@ void __init init_IRQ(void) : "i" (PSTATE_IE) : "g1"); } + +void init_irq_proc(void) +{ + /* For now, nothing... */ +} diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 0f280f818..ed2e8bd81 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.50 1999/12/01 10:44:45 davem Exp $ +/* $Id: setup.c,v 1.51 2000/02/26 04:24:32 davem Exp $ * linux/arch/sparc64/kernel/setup.c * * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu) @@ -451,6 +451,8 @@ void register_prom_callbacks(void) "' linux-.soft2 to .soft2"); } +extern void paging_init(void); + void __init setup_arch(char **cmdline_p) { extern int serial_console; /* in console.c, of course */ @@ -587,6 +589,8 @@ void __init setup_arch(char **cmdline_p) #endif if (serial_console) conswitchp = NULL; + + paging_init(); } asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on) diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index f226a8ae5..8df2116e7 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -1,4 +1,4 @@ -/* $Id: signal32.c,v 1.59 2000/01/21 11:38:52 jj Exp $ +/* $Id: signal32.c,v 1.60 2000/02/25 06:02:37 jj Exp $ * arch/sparc64/kernel/signal32.c * * Copyright (C) 1991, 1992 Linus Torvalds @@ -126,6 +126,8 @@ int copy_siginfo_to_user32(siginfo_t32 *to, siginfo_t *from) err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; + case SIGURG: + case SIGIO: case SIGSEGV: case SIGILL: case SIGFPE: diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index ff3843651..f798358ce 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -1,4 +1,4 @@ -/* $Id: sparc64_ksyms.c,v 1.74 2000/02/09 11:15:07 davem Exp $ +/* $Id: sparc64_ksyms.c,v 1.75 2000/02/21 15:50:08 davem Exp $ * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -261,7 +261,6 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strtok); EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strspn); #ifdef CONFIG_SOLARIS_EMUL_MODULE EXPORT_SYMBOL(getname32); diff --git a/arch/sparc64/lib/VIScsum.S b/arch/sparc64/lib/VIScsum.S index aad5d941a..9f77c8cb4 100644 --- a/arch/sparc64/lib/VIScsum.S +++ b/arch/sparc64/lib/VIScsum.S @@ -1,14 +1,15 @@ -/* $Id: VIScsum.S,v 1.5 1999/07/30 09:35:36 davem Exp $ +/* $Id: VIScsum.S,v 1.6 2000/02/20 23:21:39 davem Exp $ * VIScsum.S: High bandwidth IP checksumming utilizing the UltraSparc * Visual Instruction Set. * * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 2000 David S. Miller (davem@redhat.com) * * Based on older sparc32/sparc64 checksum.S, which is: * * Copyright(C) 1995 Linus Torvalds * Copyright(C) 1995 Miguel de Icaza - * Copyright(C) 1996,1997 David S. Miller + * Copyright(C) 1996, 1997 David S. Miller * derived from: * Linux/Alpha checksum c-code * Linux/ix86 inline checksum assembly @@ -38,290 +39,290 @@ * tricks are UltraLinux trade secrets :)) */ -#define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10) \ - fcmpgt32 %fz, %f0, %g1 /* FPM Group */; \ - fcmpgt32 %fz, %f2, %g2 /* FPM Group */; \ - fcmpgt32 %fz, %f4, %g3 /* FPM Group */; \ - fcmpgt32 %fz, %f6, %g5 /* FPM Group */; \ - inc %g1 /* IEU0 */; \ - fcmpgt32 %fz, %f8, %g7 /* FPM Group */; \ - srl %g1, 1, %g1 /* IEU0 */; \ - inc %g2 /* IEU1 */; \ - fcmpgt32 %fz, %f10, %o3 /* FPM Group */; \ - srl %g2, 1, %g2 /* IEU0 */; \ - add %o2, %g1, %o2 /* IEU1 */; \ - add %g3, 1, %g3 /* IEU0 Group */; \ - srl %g3, 1, %g3 /* IEU0 Group */; \ - add %o2, %g2, %o2 /* IEU1 */; \ - inc %g5 /* IEU0 Group */; \ - add %o2, %g3, %o2 /* IEU1 */; +#define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10) \ + fcmpgt32 %fz, %f0, %g1 /* FPM Group */; \ + fcmpgt32 %fz, %f2, %g2 /* FPM Group */; \ + fcmpgt32 %fz, %f4, %g3 /* FPM Group */; \ + inc %g1 /* IEU0 Group */; \ + fcmpgt32 %fz, %f6, %g5 /* FPM */; \ + srl %g1, 1, %g1 /* IEU0 Group */; \ + fcmpgt32 %fz, %f8, %g7 /* FPM */; \ + inc %g2 /* IEU0 Group */; \ + fcmpgt32 %fz, %f10, %o3 /* FPM */; \ + srl %g2, 1, %g2 /* IEU0 Group */; \ + inc %g3 /* IEU1 */; \ + srl %g3, 1, %g3 /* IEU0 Group */; \ + add %o2, %g1, %o2 /* IEU1 */; \ + add %o2, %g2, %o2 /* IEU0 Group */; \ + inc %g5 /* IEU1 */; \ + add %o2, %g3, %o2 /* IEU0 Group */; -#define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14) \ - fcmpgt32 %O12, %f12, %o4 /* FPM Group */; \ - srl %g5, 1, %g5 /* IEU0 */; \ - inc %g7 /* IEU1 */; \ - fpadd32 %F0, %f0, %F0 /* FPA */; \ - fcmpgt32 %O14, %f14, %o5 /* FPM Group */; \ - srl %g7, 1, %g7 /* IEU0 */; \ - add %o2, %g5, %o2 /* IEU1 */; \ - fpadd32 %F2, %f2, %F2 /* FPA */; \ - inc %o3 /* IEU0 Group */; \ - add %o2, %g7, %o2 /* IEU1 */; \ - fcmpgt32 %f0, %F0, %g1 /* FPM Group */; \ - srl %o3, 1, %o3 /* IEU0 */; \ - inc %o4 /* IEU1 */; \ - fpadd32 %F4, %f4, %F4 /* FPA */; \ - fcmpgt32 %f2, %F2, %g2 /* FPM Group */; \ - srl %o4, 1, %o4 /* IEU0 */; \ - add %o2, %o3, %o2 /* IEU1 */; \ - fpadd32 %F6, %f6, %F6 /* FPA */; \ - inc %o5 /* IEU0 Group */; \ - add %o2, %o4, %o2 /* IEU1 */; \ - fcmpgt32 %f4, %F4, %g3 /* FPM Group */; \ - srl %o5, 1, %o5 /* IEU0 */; \ - inc %g1 /* IEU1 */; \ - fpadd32 %F8, %f8, %F8 /* FPA */; \ - fcmpgt32 %f6, %F6, %g5 /* FPM Group */; \ - srl %g1, 1, %g1 /* IEU0 */; \ - add %o2, %o5, %o2 /* IEU1 */; \ - fpadd32 %F10, %f10, %F10 /* FPA */; \ - inc %g2 /* IEU0 Group */; \ - add %o2, %g1, %o2 /* IEU1 */; \ - fcmpgt32 %f8, %F8, %g7 /* FPM Group */; \ - srl %g2, 1, %g2 /* IEU0 */; \ - inc %g3 /* IEU1 */; \ - fpadd32 %F12, %f12, %F12 /* FPA */; \ - fcmpgt32 %f10, %F10, %o3 /* FPM Group */; \ - srl %g3, 1, %g3 /* IEU0 */; \ - add %o2, %g2, %o2 /* IEU1 */; \ - fpadd32 %F14, %f14, %F14 /* FPA */; \ - inc %g5 /* IEU0 Group */; \ - add %o2, %g3, %o2 /* IEU1 */; +#define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14) \ + srl %g5, 1, %g5 /* IEU0 Group */; \ + fpadd32 %F0, %f0, %F0 /* FPA */; \ + fcmpgt32 %O12, %f12, %o4 /* FPM */; \ + inc %g7 /* IEU0 Group */; \ + fpadd32 %F2, %f2, %F2 /* FPA */; \ + fcmpgt32 %O14, %f14, %o5 /* FPM */; \ + add %o2, %g5, %o2 /* IEU1 Group */; \ + fpadd32 %F4, %f4, %F4 /* FPA */; \ + fcmpgt32 %f0, %F0, %g1 /* FPM */; \ + srl %g7, 1, %g7 /* IEU0 Group */; \ + fpadd32 %F6, %f6, %F6 /* FPA */; \ + fcmpgt32 %f2, %F2, %g2 /* FPM */; \ + add %o2, %g7, %o2 /* IEU0 Group */; \ + fpadd32 %F8, %f8, %F8 /* FPA */; \ + fcmpgt32 %f4, %F4, %g3 /* FPM */; \ + inc %o3 /* IEU0 Group */; \ + fpadd32 %F10, %f10, %F10 /* FPA */; \ + fcmpgt32 %f6, %F6, %g5 /* FPM */; \ + srl %o3, 1, %o3 /* IEU0 Group */; \ + fpadd32 %F12, %f12, %F12 /* FPA */; \ + fcmpgt32 %f8, %F8, %g7 /* FPM */; \ + add %o2, %o3, %o2 /* IEU0 Group */; \ + fpadd32 %F14, %f14, %F14 /* FPA */; \ + fcmpgt32 %f10, %F10, %o3 /* FPM */; \ + inc %o4 /* IEU0 Group */; \ + inc %o5 /* IEU1 */; \ + srl %o4, 1, %o4 /* IEU0 Group */; \ + inc %g1 /* IEU1 */; \ + srl %o5, 1, %o5 /* IEU0 Group */; \ + add %o2, %o4, %o2 /* IEU1 */; \ + srl %g1, 1, %g1 /* IEU0 Group */; \ + add %o2, %o5, %o2 /* IEU1 */; \ + inc %g2 /* IEU0 Group */; \ + add %o2, %g1, %o2 /* IEU1 */; \ + srl %g2, 1, %g2 /* IEU0 Group */; \ + inc %g3 /* IEU1 */; \ + srl %g3, 1, %g3 /* IEU0 Group */; \ + add %o2, %g2, %o2 /* IEU1 */; \ + inc %g5 /* IEU0 Group */; \ + add %o2, %g3, %o2 /* IEU0 */; -#define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz) \ - fcmpgt32 %O12, %f12, %o4 /* FPM Group */; \ - srl %g5, 1, %g5 /* IEU0 */; \ - inc %g7 /* IEU1 */; \ - fpadd32 %f2, %f0, %S0 /* FPA */; \ - fcmpgt32 %O14, %f14, %o5 /* FPM Group */; \ - srl %g7, 1, %g7 /* IEU0 */; \ - add %o2, %g5, %o2 /* IEU1 */; \ - fpadd32 %f6, %f4, %S1 /* FPA */; \ - inc %o3 /* IEU0 Group */; \ - add %o2, %g7, %o2 /* IEU1 */; \ - fcmpgt32 %f0, %S0, %g1 /* FPM Group */; \ - srl %o3, 1, %o3 /* IEU0 */; \ - inc %o4 /* IEU1 */; \ - fpadd32 %f10, %f8, %S2 /* FPA */; \ - fcmpgt32 %f4, %S1, %g2 /* FPM Group */; \ - srl %o4, 1, %o4 /* IEU0 */; \ - add %o2, %o3, %o2 /* IEU1 */; \ - fpadd32 %f14, %f12, %S3 /* FPA */; \ - inc %o5 /* IEU0 Group */; \ - add %o2, %o4, %o2 /* IEU1 */; \ - fzero %fz /* FPA */; \ - fcmpgt32 %f8, %S2, %g3 /* FPM Group */; \ - srl %o5, 1, %o5 /* IEU0 */; \ - inc %g1 /* IEU1 */; \ - fpadd32 %S0, %S1, %T0 /* FPA */; \ - fcmpgt32 %f12, %S3, %g5 /* FPM Group */; \ - srl %g1, 1, %g1 /* IEU0 */; \ - add %o2, %o5, %o2 /* IEU1 */; \ - fpadd32 %S2, %S3, %T1 /* FPA */; \ - inc %g2 /* IEU0 Group */; \ - add %o2, %g1, %o2 /* IEU1 */; \ - fcmpgt32 %S0, %T0, %g7 /* FPM Group */; \ - srl %g2, 1, %g2 /* IEU0 */; \ - inc %g3 /* IEU1 */; \ - fcmpgt32 %S2, %T1, %o3 /* FPM Group */; \ - srl %g3, 1, %g3 /* IEU0 */; \ - add %o2, %g2, %o2 /* IEU1 */; \ - inc %g5 /* IEU0 Group */; \ - add %o2, %g3, %o2 /* IEU1 */; \ - fcmpgt32 %fz, %f2, %o4 /* FPM Group */; \ - srl %g5, 1, %g5 /* IEU0 */; \ - inc %g7 /* IEU1 */; \ - fpadd32 %T0, %T1, %U0 /* FPA */; \ - fcmpgt32 %fz, %f6, %o5 /* FPM Group */; \ - srl %g7, 1, %g7 /* IEU0 */; \ - add %o2, %g5, %o2 /* IEU1 */; \ - inc %o3 /* IEU0 Group */; \ - add %o2, %g7, %o2 /* IEU1 */; \ - fcmpgt32 %fz, %f10, %g1 /* FPM Group */; \ - srl %o3, 1, %o3 /* IEU0 */; \ - inc %o4 /* IEU1 */; \ - fcmpgt32 %fz, %f14, %g2 /* FPM Group */; \ - srl %o4, 1, %o4 /* IEU0 */; \ - add %o2, %o3, %o2 /* IEU1 */; \ - std %U0, [%sp + STACKOFF] /* Store Group */; \ - inc %o5 /* IEU0 */; \ - sub %o2, %o4, %o2 /* IEU1 */; \ - fcmpgt32 %fz, %S1, %g3 /* FPM Group */; \ - srl %o5, 1, %o5 /* IEU0 */; \ - inc %g1 /* IEU1 */; \ - fcmpgt32 %fz, %S3, %g5 /* FPM Group */; \ - srl %g1, 1, %g1 /* IEU0 */; \ - sub %o2, %o5, %o2 /* IEU1 */; \ - ldx [%sp + STACKOFF], %o5 /* Load Group */; \ - inc %g2 /* IEU0 */; \ - sub %o2, %g1, %o2 /* IEU1 */; \ - fcmpgt32 %fz, %T1, %g7 /* FPM Group */; \ - srl %g2, 1, %g2 /* IEU0 */; \ - inc %g3 /* IEU1 */; \ - fcmpgt32 %T0, %U0, %o3 /* FPM Group */; \ - srl %g3, 1, %g3 /* IEU0 */; \ - sub %o2, %g2, %o2 /* IEU1 */; \ - inc %g5 /* IEU0 Group */; \ - sub %o2, %g3, %o2 /* IEU1 */; \ - fcmpgt32 %fz, %U0, %o4 /* FPM Group */; \ - srl %g5, 1, %g5 /* IEU0 */; \ - inc %g7 /* IEU1 */; \ - srl %g7, 1, %g7 /* IEU0 Group */; \ - sub %o2, %g5, %o2 /* IEU1 */; \ - inc %o3 /* IEU0 Group */; \ - sub %o2, %g7, %o2 /* IEU1 */; \ - srl %o3, 1, %o3 /* IEU0 Group */; \ - inc %o4 /* IEU1 */; \ - srl %o4, 1, %o4 /* IEU0 Group */; \ - add %o2, %o3, %o2 /* IEU1 */; \ - sub %o2, %o4, %o2 /* IEU0 Group */; \ - addcc %o2, %o5, %o2 /* IEU1 Group */; \ - bcs,a,pn %xcc, 33f /* CTI */; \ - add %o2, 1, %o2 /* IEU0 */; \ -33: /* That's it */; +#define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz) \ + srl %g5, 1, %g5 /* IEU0 Group */; \ + fpadd32 %f2, %f0, %S0 /* FPA */; \ + fcmpgt32 %O12, %f12, %o4 /* FPM */; \ + inc %g7 /* IEU0 Group */; \ + fpadd32 %f6, %f4, %S1 /* FPA */; \ + fcmpgt32 %O14, %f14, %o5 /* FPM */; \ + srl %g7, 1, %g7 /* IEU0 Group */; \ + fpadd32 %f10, %f8, %S2 /* FPA */; \ + fcmpgt32 %f0, %S0, %g1 /* FPM */; \ + inc %o3 /* IEU0 Group */; \ + fpadd32 %f14, %f12, %S3 /* FPA */; \ + fcmpgt32 %f4, %S1, %g2 /* FPM */; \ + add %o2, %g5, %o2 /* IEU0 Group */; \ + fpadd32 %S0, %S1, %T0 /* FPA */; \ + fcmpgt32 %f8, %S2, %g3 /* FPM */; \ + add %o2, %g7, %o2 /* IEU0 Group */; \ + fzero %fz /* FPA */; \ + fcmpgt32 %f12, %S3, %g5 /* FPM */; \ + srl %o3, 1, %o3 /* IEU0 Group */; \ + fpadd32 %S2, %S3, %T1 /* FPA */; \ + fcmpgt32 %S0, %T0, %g7 /* FPM */; \ + add %o2, %o3, %o2 /* IEU0 Group */; \ + fpadd32 %T0, %T1, %U0 /* FPA */; \ + fcmpgt32 %S2, %T1, %o3 /* FPM */; \ + inc %o4 /* IEU0 Group */; \ + inc %o5 /* IEU1 */; \ + srl %o4, 1, %o4 /* IEU0 Group */; \ + inc %g1 /* IEU1 */; \ + add %o2, %o4, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %f2, %o4 /* FPM */; \ + srl %o5, 1, %o5 /* IEU0 Group */; \ + inc %g2 /* IEU1 */; \ + add %o2, %o5, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %f6, %o5 /* FPM */; \ + srl %g1, 1, %g1 /* IEU0 Group */; \ + inc %g3 /* IEU1 */; \ + add %o2, %g1, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %f10, %g1 /* FPM */; \ + srl %g2, 1, %g2 /* IEU0 Group */; \ + inc %g5 /* IEU1 */; \ + add %o2, %g2, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %f14, %g2 /* FPM */; \ + srl %g3, 1, %g3 /* IEU0 Group */; \ + inc %g7 /* IEU1 */; \ + add %o2, %g3, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %S1, %g3 /* FPM */; \ + srl %g5, 1, %g5 /* IEU0 Group */; \ + inc %o3 /* IEU1 */; \ + add %o2, %g5, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %S3, %g5 /* FPM */; \ + srl %g7, 1, %g7 /* IEU0 Group */; \ + inc %o4 /* IEU1 */; \ + add %o2, %g7, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %T1, %g7 /* FPM */; \ + srl %o3, 1, %o3 /* IEU0 Group */; \ + inc %o5 /* IEU1 */; \ + add %o2, %o3, %o2 /* IEU0 Group */; \ + fcmpgt32 %T0, %U0, %o3 /* FPM */; \ + srl %o4, 1, %o4 /* IEU0 Group */; \ + inc %g1 /* IEU1 */; \ + sub %o2, %o4, %o2 /* IEU0 Group */; \ + fcmpgt32 %fz, %U0, %o4 /* FPM */; \ + srl %o5, 1, %o5 /* IEU0 Group */; \ + inc %g2 /* IEU1 */; \ + srl %g1, 1, %g1 /* IEU0 Group */; \ + sub %o2, %o5, %o2 /* IEU1 */; \ + std %U0, [%sp + STACKOFF] /* Store */; \ + srl %g2, 1, %g2 /* IEU0 Group */; \ + sub %o2, %g1, %o2 /* IEU1 */; \ + inc %g3 /* IEU0 Group */; \ + sub %o2, %g2, %o2 /* IEU1 */; \ + srl %g3, 1, %g3 /* IEU0 Group */; \ + inc %g5 /* IEU1 */; \ + srl %g5, 1, %g5 /* IEU0 Group */; \ + sub %o2, %g3, %o2 /* IEU1 */; \ + ldx [%sp + STACKOFF], %o5 /* Load Group */; \ + inc %g7 /* IEU0 */; \ + sub %o2, %g5, %o2 /* IEU1 */; \ + srl %g7, 1, %g7 /* IEU0 Group */; \ + inc %o3 /* IEU1 */; \ + srl %o3, 1, %o3 /* IEU0 Group */; \ + sub %o2, %g7, %o2 /* IEU1 */; \ + inc %o4 /* IEU0 Group */; \ + add %o2, %o3, %o2 /* IEU1 */; \ + srl %o4, 1, %o4 /* IEU0 Group */; \ + sub %o2, %o4, %o2 /* IEU0 Group */; \ + addcc %o2, %o5, %o2 /* IEU1 Group */; \ + bcs,a,pn %xcc, 33f /* CTI */; \ + add %o2, 1, %o2 /* IEU0 */; \ +33: /* That's it */; -#define CSUM_LASTCHUNK(offset) \ - ldx [%o0 - offset - 0x10], %g2; \ - ldx [%o0 - offset - 0x08], %g3; \ - addcc %g2, %o2, %o2; \ - bcs,a,pn %xcc, 31f; \ - add %o2, 1, %o2; \ -31: addcc %g3, %o2, %o2; \ - bcs,a,pn %xcc, 32f; \ - add %o2, 1, %o2; \ +#define CSUM_LASTCHUNK(offset) \ + ldx [%o0 - offset - 0x10], %g2; \ + ldx [%o0 - offset - 0x08], %g3; \ + addcc %g2, %o2, %o2; \ + bcs,a,pn %xcc, 31f; \ + add %o2, 1, %o2; \ +31: addcc %g3, %o2, %o2; \ + bcs,a,pn %xcc, 32f; \ + add %o2, 1, %o2; \ 32: .text .globl csum_partial .align 32 csum_partial: - andcc %o0, 7, %g0 /* IEU1 Group */ - be,pt %icc, 4f /* CTI */ - andcc %o0, 0x38, %g3 /* IEU1 */ - mov 1, %g5 /* IEU0 Group */ - cmp %o1, 6 /* IEU1 */ - bl,pn %icc, 21f /* CTI */ - andcc %o0, 2, %g0 /* IEU1 Group */ - be,pt %icc, 1f /* CTI */ - and %o0, 4, %g7 /* IEU0 */ - lduh [%o0], %g2 /* Load */ - sub %o1, 2, %o1 /* IEU0 Group */ - add %o0, 2, %o0 /* IEU1 */ - andcc %o0, 4, %g7 /* IEU1 Group */ - sll %g5, 16, %g5 /* IEU0 */ - sll %g2, 16, %g2 /* IEU0 Group */ - addcc %g2, %o2, %o2 /* IEU1 Group (regdep) */ - bcs,a,pn %icc, 1f /* CTI */ - add %o2, %g5, %o2 /* IEU0 */ -1: ld [%o0], %g2 /* Load */ - brz,a,pn %g7, 4f /* CTI+IEU1 Group */ - and %o0, 0x38, %g3 /* IEU0 */ - add %o0, 4, %o0 /* IEU0 Group */ - sub %o1, 4, %o1 /* IEU1 */ - addcc %g2, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %icc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: and %o0, 0x38, %g3 /* IEU1 Group */ -4: srl %o2, 0, %o2 /* IEU0 Group */ - mov 0x40, %g1 /* IEU1 */ - brz,pn %g3, 3f /* CTI+IEU1 Group */ - sub %g1, %g3, %g1 /* IEU0 */ - cmp %o1, 56 /* IEU1 Group */ - blu,pn %icc, 20f /* CTI */ - andcc %o0, 8, %g0 /* IEU1 Group */ - be,pn %icc, 1f /* CTI */ - ldx [%o0], %g2 /* Load */ - add %o0, 8, %o0 /* IEU0 Group */ - sub %o1, 8, %o1 /* IEU1 */ - addcc %g2, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: andcc %g1, 0x10, %g0 /* IEU1 Group */ - be,pn %icc, 2f /* CTI */ - and %g1, 0x20, %g1 /* IEU0 */ - ldx [%o0], %g2 /* Load */ - ldx [%o0+8], %g3 /* Load Group */ - add %o0, 16, %o0 /* IEU0 */ - sub %o1, 16, %o1 /* IEU1 */ - addcc %g2, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: addcc %g3, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 2f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -2: brz,pn %g1, 3f /* CTI+IEU1 Group */ - ldx [%o0], %g2 /* Load */ - ldx [%o0+8], %g3 /* Load Group */ - ldx [%o0+16], %g5 /* Load Group */ - ldx [%o0+24], %g7 /* Load Group */ - add %o0, 32, %o0 /* IEU0 */ - sub %o1, 32, %o1 /* IEU1 */ - addcc %g2, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: addcc %g3, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: addcc %g5, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: addcc %g7, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 3f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -3: cmp %o1, 0xc0 /* IEU1 Group */ - blu,pn %icc, 20f /* CTI */ - sllx %o2, 32, %g5 /* IEU0 */ + andcc %o0, 7, %g0 /* IEU1 Group */ + be,pt %icc, 4f /* CTI */ + andcc %o0, 0x38, %g3 /* IEU1 */ + mov 1, %g5 /* IEU0 Group */ + cmp %o1, 6 /* IEU1 */ + bl,pn %icc, 21f /* CTI */ + andcc %o0, 2, %g0 /* IEU1 Group */ + be,pt %icc, 1f /* CTI */ + and %o0, 4, %g7 /* IEU0 */ + lduh [%o0], %g2 /* Load */ + sub %o1, 2, %o1 /* IEU0 Group */ + add %o0, 2, %o0 /* IEU1 */ + andcc %o0, 4, %g7 /* IEU1 Group */ + sll %g5, 16, %g5 /* IEU0 */ + sll %g2, 16, %g2 /* IEU0 Group */ + addcc %g2, %o2, %o2 /* IEU1 Group (regdep) */ + bcs,a,pn %icc, 1f /* CTI */ + add %o2, %g5, %o2 /* IEU0 */ +1: ld [%o0], %g2 /* Load */ + brz,a,pn %g7, 4f /* CTI+IEU1 Group */ + and %o0, 0x38, %g3 /* IEU0 */ + add %o0, 4, %o0 /* IEU0 Group */ + sub %o1, 4, %o1 /* IEU1 */ + addcc %g2, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %icc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: and %o0, 0x38, %g3 /* IEU1 Group */ +4: srl %o2, 0, %o2 /* IEU0 Group */ + mov 0x40, %g1 /* IEU1 */ + brz,pn %g3, 3f /* CTI+IEU1 Group */ + sub %g1, %g3, %g1 /* IEU0 */ + cmp %o1, 56 /* IEU1 Group */ + blu,pn %icc, 20f /* CTI */ + andcc %o0, 8, %g0 /* IEU1 Group */ + be,pn %icc, 1f /* CTI */ + ldx [%o0], %g2 /* Load */ + add %o0, 8, %o0 /* IEU0 Group */ + sub %o1, 8, %o1 /* IEU1 */ + addcc %g2, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: andcc %g1, 0x10, %g0 /* IEU1 Group */ + be,pn %icc, 2f /* CTI */ + and %g1, 0x20, %g1 /* IEU0 */ + ldx [%o0], %g2 /* Load */ + ldx [%o0+8], %g3 /* Load Group */ + add %o0, 16, %o0 /* IEU0 */ + sub %o1, 16, %o1 /* IEU1 */ + addcc %g2, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: addcc %g3, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 2f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +2: brz,pn %g1, 3f /* CTI+IEU1 Group */ + ldx [%o0], %g2 /* Load */ + ldx [%o0+8], %g3 /* Load Group */ + ldx [%o0+16], %g5 /* Load Group */ + ldx [%o0+24], %g7 /* Load Group */ + add %o0, 32, %o0 /* IEU0 */ + sub %o1, 32, %o1 /* IEU1 */ + addcc %g2, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: addcc %g3, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: addcc %g5, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: addcc %g7, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 3f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +3: cmp %o1, 0xc0 /* IEU1 Group */ + blu,pn %icc, 20f /* CTI */ + sllx %o2, 32, %g5 /* IEU0 */ #ifdef __KERNEL__ VISEntry #endif - addcc %o2, %g5, %o2 /* IEU1 Group */ - sub %o1, 0xc0, %o1 /* IEU0 */ - wr %g0, ASI_BLK_P, %asi /* LSU Group */ - membar #StoreLoad /* LSU Group */ - srlx %o2, 32, %o2 /* IEU0 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU1 */ -1: andcc %o1, 0x80, %g0 /* IEU1 Group */ - bne,pn %icc, 7f /* CTI */ - andcc %o1, 0x40, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - fzero %f12 /* FPA */ - fzero %f14 /* FPA Group */ + addcc %o2, %g5, %o2 /* IEU1 Group */ + sub %o1, 0xc0, %o1 /* IEU0 */ + wr %g0, ASI_BLK_P, %asi /* LSU Group */ + membar #StoreLoad /* LSU Group */ + srlx %o2, 32, %o2 /* IEU0 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU1 */ +1: andcc %o1, 0x80, %g0 /* IEU1 Group */ + bne,pn %icc, 7f /* CTI */ + andcc %o1, 0x40, %g0 /* IEU1 Group */ + be,pn %icc, 6f /* CTI */ + fzero %f12 /* FPA */ + fzero %f14 /* FPA Group */ ldda [%o0 + 0x000] %asi, %f16 ldda [%o0 + 0x040] %asi, %f32 ldda [%o0 + 0x080] %asi, %f48 START_THE_TRICK(f12,f16,f18,f20,f22,f24,f26) ba,a,pt %xcc, 3f -6: sub %o0, 0x40, %o0 /* IEU0 Group */ - fzero %f28 /* FPA */ - fzero %f30 /* FPA Group */ +6: sub %o0, 0x40, %o0 /* IEU0 Group */ + fzero %f28 /* FPA */ + fzero %f30 /* FPA Group */ ldda [%o0 + 0x040] %asi, %f32 ldda [%o0 + 0x080] %asi, %f48 ldda [%o0 + 0x0c0] %asi, %f0 START_THE_TRICK(f28,f32,f34,f36,f38,f40,f42) ba,a,pt %xcc, 4f -7: bne,pt %icc, 8f /* CTI */ - fzero %f44 /* FPA */ - add %o0, 0x40, %o0 /* IEU0 Group */ - fzero %f60 /* FPA */ - fzero %f62 /* FPA Group */ +7: bne,pt %icc, 8f /* CTI */ + fzero %f44 /* FPA */ + add %o0, 0x40, %o0 /* IEU0 Group */ + fzero %f60 /* FPA */ + fzero %f62 /* FPA Group */ ldda [%o0 - 0x040] %asi, %f0 ldda [%o0 + 0x000] %asi, %f16 ldda [%o0 + 0x040] %asi, %f32 START_THE_TRICK(f60,f0,f2,f4,f6,f8,f10) ba,a,pt %xcc, 2f -8: add %o0, 0x80, %o0 /* IEU0 Group */ - fzero %f46 /* FPA */ +8: add %o0, 0x80, %o0 /* IEU0 Group */ + fzero %f46 /* FPA */ ldda [%o0 - 0x080] %asi, %f48 ldda [%o0 - 0x040] %asi, %f0 ldda [%o0 + 0x000] %asi, %f16 @@ -333,36 +334,36 @@ csum_partial: 3: DO_THE_TRICK(f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46) ldda [%o0 + 0x0c0] %asi, %f0 4: DO_THE_TRICK(f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,f48,f50,f52,f54,f56,f58,f60,f62) - add %o0, 0x100, %o0 /* IEU0 Group */ - subcc %o1, 0x100, %o1 /* IEU1 */ - bgeu,a,pt %icc, 1b /* CTI */ + add %o0, 0x100, %o0 /* IEU0 Group */ + subcc %o1, 0x100, %o1 /* IEU1 */ + bgeu,a,pt %icc, 1b /* CTI */ ldda [%o0 + 0x000] %asi, %f16 - membar #Sync /* LSU Group */ + membar #Sync /* LSU Group */ DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14) END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30) #ifdef __KERNEL__ ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %g7 #endif - and %o1, 0x3f, %o1 /* IEU0 Group */ + and %o1, 0x3f, %o1 /* IEU0 Group */ #ifdef __KERNEL__ VISExit wr %g7, %g0, %asi #endif -20: andcc %o1, 0xf0, %g1 /* IEU1 Group */ - be,pn %icc, 23f /* CTI */ - and %o1, 0xf, %o3 /* IEU0 */ +20: andcc %o1, 0xf0, %g1 /* IEU1 Group */ + be,pn %icc, 23f /* CTI */ + and %o1, 0xf, %o3 /* IEU0 */ #ifdef __KERNEL__ -22: sll %g1, 1, %o4 /* IEU0 Group */ - sethi %hi(23f), %g7 /* IEU1 */ - sub %g7, %o4, %g7 /* IEU0 Group */ - jmpl %g7 + %lo(23f), %g0 /* CTI Group brk forced */ - add %o0, %g1, %o0 /* IEU0 */ +22: sll %g1, 1, %o4 /* IEU0 Group */ + sethi %hi(23f), %g7 /* IEU1 */ + sub %g7, %o4, %g7 /* IEU0 Group */ + jmpl %g7 + %lo(23f), %g0 /* CTI Group brk forced*/ + add %o0, %g1, %o0 /* IEU0 */ #else -22: rd %pc, %g7 /* LSU Group+4bubbles */ - sll %g1, 1, %o4 /* IEU0 Group */ - sub %g7, %o4, %g7 /* IEU0 Group (regdep) */ - jmpl %g7 + (23f - 22b), %g0 /* CTI Group brk forced */ - add %o0, %g1, %o0 /* IEU0 */ +22: rd %pc, %g7 /* LSU Group+4bubbles */ + sll %g1, 1, %o4 /* IEU0 Group */ + sub %g7, %o4, %g7 /* IEU0 Group (regdep) */ + jmpl %g7 + (23f - 22b), %g0 /* CTI Group brk forced*/ + add %o0, %g1, %o0 /* IEU0 */ #endif CSUM_LASTCHUNK(0xe0) CSUM_LASTCHUNK(0xd0) @@ -379,72 +380,72 @@ csum_partial: CSUM_LASTCHUNK(0x20) CSUM_LASTCHUNK(0x10) CSUM_LASTCHUNK(0x00) -23: brnz,pn %o3, 26f /* CTI+IEU1 Group */ -24: sllx %o2, 32, %g1 /* IEU0 */ -25: addcc %o2, %g1, %o0 /* IEU1 Group */ - srlx %o0, 32, %o0 /* IEU0 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o0, 1, %o0 /* IEU1 */ -1: retl /* CTI Group brk forced */ - srl %o0, 0, %o0 /* IEU0 */ -26: andcc %o1, 8, %g0 /* IEU1 Group */ - be,pn %icc, 1f /* CTI */ - ldx [%o0], %g3 /* Load */ - add %o0, 8, %o0 /* IEU0 Group */ - addcc %g3, %o2, %o2 /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: andcc %o1, 4, %g0 /* IEU1 Group */ - be,a,pn %icc, 1f /* CTI */ - clr %g2 /* IEU0 */ - ld [%o0], %g2 /* Load */ - add %o0, 4, %o0 /* IEU0 Group */ - sllx %g2, 32, %g2 /* IEU0 Group */ -1: andcc %o1, 2, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %o4 /* IEU0 Group */ - lduh [%o0], %o4 /* Load */ - add %o0, 2, %o0 /* IEU1 */ - sll %o4, 16, %o4 /* IEU0 Group */ -1: andcc %o1, 1, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %o5 /* IEU0 Group */ - ldub [%o0], %o5 /* Load */ - sll %o5, 8, %o5 /* IEU0 Group */ -1: or %g2, %o4, %o4 /* IEU1 */ - or %o5, %o4, %o4 /* IEU0 Group (regdep) */ - addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: ba,pt %xcc, 25b /* CTI Group */ - sllx %o2, 32, %g1 /* IEU0 */ -21: srl %o2, 0, %o2 /* IEU0 Group */ - cmp %o1, 0 /* IEU1 */ - be,pn %icc, 24b /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - be,a,pn %icc, 1f /* CTI */ - clr %g2 /* IEU0 */ - lduh [%o0], %g3 /* Load */ - lduh [%o0+2], %g2 /* Load Group */ - add %o0, 4, %o0 /* IEU0 Group */ - sllx %g3, 48, %g3 /* IEU0 Group */ - sllx %g2, 32, %g2 /* IEU0 Group */ - or %g3, %g2, %g2 /* IEU0 Group */ -1: andcc %o1, 2, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %o4 /* IEU0 Group */ - lduh [%o0], %o4 /* Load */ - add %o0, 2, %o0 /* IEU1 */ - sll %o4, 16, %o4 /* IEU0 Group */ -1: andcc %o1, 1, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %o5 /* IEU0 Group */ - ldub [%o0], %o5 /* Load */ - sll %o5, 8, %o5 /* IEU0 Group */ -1: or %g2, %o4, %o4 /* IEU1 */ - or %o5, %o4, %o4 /* IEU0 Group (regdep) */ - addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %o2, 1, %o2 /* IEU0 */ -1: ba,pt %xcc, 25b /* CTI Group */ - sllx %o2, 32, %g1 /* IEU0 */ +23: brnz,pn %o3, 26f /* CTI+IEU1 Group */ +24: sllx %o2, 32, %g1 /* IEU0 */ +25: addcc %o2, %g1, %o0 /* IEU1 Group */ + srlx %o0, 32, %o0 /* IEU0 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o0, 1, %o0 /* IEU1 */ +1: retl /* CTI Group brk forced*/ + srl %o0, 0, %o0 /* IEU0 */ +26: andcc %o1, 8, %g0 /* IEU1 Group */ + be,pn %icc, 1f /* CTI */ + ldx [%o0], %g3 /* Load */ + add %o0, 8, %o0 /* IEU0 Group */ + addcc %g3, %o2, %o2 /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: andcc %o1, 4, %g0 /* IEU1 Group */ + be,a,pn %icc, 1f /* CTI */ + clr %g2 /* IEU0 */ + ld [%o0], %g2 /* Load */ + add %o0, 4, %o0 /* IEU0 Group */ + sllx %g2, 32, %g2 /* IEU0 Group */ +1: andcc %o1, 2, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %o4 /* IEU0 Group */ + lduh [%o0], %o4 /* Load */ + add %o0, 2, %o0 /* IEU1 */ + sll %o4, 16, %o4 /* IEU0 Group */ +1: andcc %o1, 1, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %o5 /* IEU0 Group */ + ldub [%o0], %o5 /* Load */ + sll %o5, 8, %o5 /* IEU0 Group */ +1: or %g2, %o4, %o4 /* IEU1 */ + or %o5, %o4, %o4 /* IEU0 Group (regdep) */ + addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: ba,pt %xcc, 25b /* CTI Group */ + sllx %o2, 32, %g1 /* IEU0 */ +21: srl %o2, 0, %o2 /* IEU0 Group */ + cmp %o1, 0 /* IEU1 */ + be,pn %icc, 24b /* CTI */ + andcc %o1, 4, %g0 /* IEU1 Group */ + be,a,pn %icc, 1f /* CTI */ + clr %g2 /* IEU0 */ + lduh [%o0], %g3 /* Load */ + lduh [%o0+2], %g2 /* Load Group */ + add %o0, 4, %o0 /* IEU0 Group */ + sllx %g3, 48, %g3 /* IEU0 Group */ + sllx %g2, 32, %g2 /* IEU0 Group */ + or %g3, %g2, %g2 /* IEU0 Group */ +1: andcc %o1, 2, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %o4 /* IEU0 Group */ + lduh [%o0], %o4 /* Load */ + add %o0, 2, %o0 /* IEU1 */ + sll %o4, 16, %o4 /* IEU0 Group */ +1: andcc %o1, 1, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %o5 /* IEU0 Group */ + ldub [%o0], %o5 /* Load */ + sll %o5, 8, %o5 /* IEU0 Group */ +1: or %g2, %o4, %o4 /* IEU1 */ + or %o5, %o4, %o4 /* IEU0 Group (regdep) */ + addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %o2, 1, %o2 /* IEU0 */ +1: ba,pt %xcc, 25b /* CTI Group */ + sllx %o2, 32, %g1 /* IEU0 */ diff --git a/arch/sparc64/lib/VIScsumcopy.S b/arch/sparc64/lib/VIScsumcopy.S index 3f89eea29..9b0193022 100644 --- a/arch/sparc64/lib/VIScsumcopy.S +++ b/arch/sparc64/lib/VIScsumcopy.S @@ -1,4 +1,4 @@ -/* $Id: VIScsumcopy.S,v 1.7 2000/01/19 04:06:03 davem Exp $ +/* $Id: VIScsumcopy.S,v 1.8 2000/02/20 23:21:39 davem Exp $ * VIScsumcopy.S: High bandwidth IP checksumming with simultaneous * copying utilizing the UltraSparc Visual Instruction Set. * @@ -62,384 +62,386 @@ * per 64bytes checksummed/copied. */ -#define LDBLK(O0) \ - ldda [%src] %asi, %O0 /* Load Group */ +#define LDBLK(O0) \ + ldda [%src] %asi, %O0 /* Load Group */ -#define STBLK \ - stda %f48, [%dst] ASI_BLK_P /* Store */ +#define STBLK \ + stda %f48, [%dst] ASI_BLK_P /* Store */ -#define ST(fx,off) \ - std %fx, [%dst + off] /* Store */ +#define ST(fx,off) \ + std %fx, [%dst + off] /* Store */ -#define SYNC \ +#define SYNC \ membar #Sync #define DO_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14,DUMMY1,A0,A2,A4,A6,A8,A10,A12,A14,B14,DUMMY2,LOAD,STORE1,STORE2,STORE3,STORE4,STORE5,STORE6,STORE7,STORE8,DUMMY3,BRANCH...) \ - LOAD /* Load Group */; \ - faligndata %A14, %F0, %A14 /* FPA Group */; \ - inc %x5 /* IEU0 */; \ - STORE1 /* Store (optional) */; \ - faligndata %F0, %F2, %A0 /* FPA Group */; \ - srl %x5, 1, %x5 /* IEU0 */; \ - add %sum, %x4, %sum /* IEU1 */; \ - fpadd32 %F0, %f0, %F0 /* FPA Group */; \ - inc %x6 /* IEU0 */; \ - STORE2 /* Store (optional) */; \ - faligndata %F2, %F4, %A2 /* FPA Group */; \ - srl %x6, 1, %x6 /* IEU0 */; \ - add %sum, %x5, %sum /* IEU1 */; \ - fpadd32 %F2, %f2, %F2 /* FPA Group */; \ - add %src, 64, %src /* IEU0 */; \ - add %dst, 64, %dst /* IEU1 */; \ - fcmpgt32 %f0, %F0, %x1 /* FPM Group */; \ - inc %x7 /* IEU0 */; \ - STORE3 /* Store (optional) */; \ - faligndata %F4, %F6, %A4 /* FPA */; \ - srl %x7, 1, %x7 /* IEU0 Group */; \ - add %sum, %x6, %sum /* IEU1 */; \ - fpadd32 %F4, %f4, %F4 /* FPA */; \ - fcmpgt32 %f2, %F2, %x2 /* FPM Group */; \ - inc %x8 /* IEU0 */; \ - STORE4 /* Store (optional) */; \ - faligndata %F6, %F8, %A6 /* FPA */; \ - srl %x8, 1, %x8 /* IEU0 Group */; \ - add %sum, %x7, %sum /* IEU1 */; \ - fpadd32 %F6, %f6, %F6 /* FPA */; \ - fcmpgt32 %f4, %F4, %x3 /* FPM Group */; \ - inc %x1 /* IEU0 */; \ - STORE5 /* Store (optional) */; \ - faligndata %F8, %F10, %A8 /* FPA */; \ - srl %x1, 1, %x1 /* IEU0 Group */; \ - add %sum, %x8, %sum /* IEU1 */; \ - fpadd32 %F8, %f8, %F8 /* FPA */; \ - fcmpgt32 %f6, %F6, %x4 /* FPM Group */; \ - inc %x2 /* IEU0 */; \ - STORE6 /* Store (optional) */; \ - faligndata %F10, %F12, %A10 /* FPA */; \ - srl %x2, 1, %x2 /* IEU0 Group */; \ - add %sum, %x1, %sum /* IEU1 */; \ - fpadd32 %F10, %f10, %F10 /* FPA */; \ - fcmpgt32 %f8, %F8, %x5 /* FPM Group */; \ - inc %x3 /* IEU0 */; \ - STORE7 /* Store (optional) */; \ - faligndata %F12, %F14, %A12 /* FPA */; \ - srl %x3, 1, %x3 /* IEU0 Group */; \ - add %sum, %x2, %sum /* IEU1 */; \ - fpadd32 %F12, %f12, %F12 /* FPA */; \ - fcmpgt32 %f10, %F10, %x6 /* FPM Group */; \ - inc %x4 /* IEU0 */; \ - STORE8 /* Store (optional) */; \ - fmovd %F14, %B14 /* FPA */; \ - srl %x4, 1, %x4 /* IEU0 Group */; \ - add %sum, %x3, %sum /* IEU1 */; \ - fpadd32 %F14, %f14, %F14 /* FPA */; \ - fcmpgt32 %f12, %F12, %x7 /* FPM Group */; \ - subcc %len, 64, %len /* IEU1 */; \ - BRANCH /* CTI */; \ - fcmpgt32 %f14, %F14, %x8 /* FPM Group */; \ + LOAD /* Load (Group) */; \ + faligndata %A14, %F0, %A14 /* FPA Group */; \ + inc %x5 /* IEU0 */; \ + STORE1 /* Store (optional) */; \ + faligndata %F0, %F2, %A0 /* FPA Group */; \ + srl %x5, 1, %x5 /* IEU0 */; \ + add %sum, %x4, %sum /* IEU1 */; \ + fpadd32 %F0, %f0, %F0 /* FPA Group */; \ + inc %x6 /* IEU0 */; \ + STORE2 /* Store (optional) */; \ + faligndata %F2, %F4, %A2 /* FPA Group */; \ + srl %x6, 1, %x6 /* IEU0 */; \ + add %sum, %x5, %sum /* IEU1 */; \ + fpadd32 %F2, %f2, %F2 /* FPA Group */; \ + add %src, 64, %src /* IEU0 */; \ + fcmpgt32 %f0, %F0, %x1 /* FPM */; \ + add %dst, 64, %dst /* IEU1 Group */; \ + inc %x7 /* IEU0 */; \ + STORE3 /* Store (optional) */; \ + faligndata %F4, %F6, %A4 /* FPA */; \ + fpadd32 %F4, %f4, %F4 /* FPA Group */; \ + add %sum, %x6, %sum /* IEU1 */; \ + fcmpgt32 %f2, %F2, %x2 /* FPM */; \ + srl %x7, 1, %x7 /* IEU0 Group */; \ + inc %x8 /* IEU1 */; \ + STORE4 /* Store (optional) */; \ + faligndata %F6, %F8, %A6 /* FPA */; \ + fpadd32 %F6, %f6, %F6 /* FPA Group */; \ + srl %x8, 1, %x8 /* IEU0 */; \ + fcmpgt32 %f4, %F4, %x3 /* FPM */; \ + add %sum, %x7, %sum /* IEU0 Group */; \ + inc %x1 /* IEU1 */; \ + STORE5 /* Store (optional) */; \ + faligndata %F8, %F10, %A8 /* FPA */; \ + fpadd32 %F8, %f8, %F8 /* FPA Group */; \ + srl %x1, 1, %x1 /* IEU0 */; \ + fcmpgt32 %f6, %F6, %x4 /* FPM */; \ + add %sum, %x8, %sum /* IEU0 Group */; \ + inc %x2 /* IEU1 */; \ + STORE6 /* Store (optional) */; \ + faligndata %F10, %F12, %A10 /* FPA */; \ + fpadd32 %F10, %f10, %F10 /* FPA Group */; \ + srl %x2, 1, %x2 /* IEU0 */; \ + fcmpgt32 %f8, %F8, %x5 /* FPM */; \ + add %sum, %x1, %sum /* IEU0 Group */; \ + inc %x3 /* IEU1 */; \ + STORE7 /* Store (optional) */; \ + faligndata %F12, %F14, %A12 /* FPA */; \ + fpadd32 %F12, %f12, %F12 /* FPA Group */; \ + srl %x3, 1, %x3 /* IEU0 */; \ + fcmpgt32 %f10, %F10, %x6 /* FPM */; \ + add %sum, %x2, %sum /* IEU0 Group */; \ + inc %x4 /* IEU1 */; \ + STORE8 /* Store (optional) */; \ + fmovd %F14, %B14 /* FPA */; \ + fpadd32 %F14, %f14, %F14 /* FPA Group */; \ + srl %x4, 1, %x4 /* IEU0 */; \ + fcmpgt32 %f12, %F12, %x7 /* FPM */; \ + add %sum, %x3, %sum /* IEU0 Group */; \ + subcc %len, 64, %len /* IEU1 */; \ + BRANCH /* CTI */; \ + fcmpgt32 %f14, %F14, %x8 /* FPM Group */; #define END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,S0,S1,S2,S3,T0,T1,U0,fz) \ - inc %x5 /* IEU0 Group */; \ - fpadd32 %f2, %f0, %S0 /* FPA */; \ - srl %x5, 1, %x5 /* IEU0 Group */; \ - add %sum, %x4, %sum /* IEU1 */; \ - fpadd32 %f6, %f4, %S1 /* FPA */; \ - inc %x6 /* IEU0 Group */; \ - add %sum, %x5, %sum /* IEU1 */; \ - fcmpgt32 %f0, %S0, %x1 /* FPM Group */; \ - srl %x6, 1, %x6 /* IEU0 */; \ - inc %x7 /* IEU1 */; \ - fpadd32 %f10, %f8, %S2 /* FPA */; \ - fcmpgt32 %f4, %S1, %x2 /* FPM Group */; \ - srl %x7, 1, %x7 /* IEU0 */; \ - add %sum, %x6, %sum /* IEU1 */; \ - fpadd32 %f14, %f12, %S3 /* FPA */; \ - inc %x8 /* IEU0 Group */; \ - add %sum, %x7, %sum /* IEU1 */; \ - fzero %fz /* FPA */; \ - fcmpgt32 %f8, %S2, %x3 /* FPM Group */; \ - srl %x8, 1, %x8 /* IEU0 */; \ - inc %x1 /* IEU1 */; \ - fpadd32 %S0, %S1, %T0 /* FPA */; \ - fcmpgt32 %f12, %S3, %x4 /* FPM Group */; \ - srl %x1, 1, %x1 /* IEU0 */; \ - add %sum, %x8, %sum /* IEU1 */; \ - fpadd32 %S2, %S3, %T1 /* FPA */; \ - inc %x2 /* IEU0 Group */; \ - add %sum, %x1, %sum /* IEU1 */; \ - fcmpgt32 %S0, %T0, %x5 /* FPM Group */; \ - srl %x2, 1, %x2 /* IEU0 */; \ - inc %x3 /* IEU1 */; \ - fcmpgt32 %S2, %T1, %x6 /* FPM Group */; \ - srl %x3, 1, %x3 /* IEU0 */; \ - add %sum, %x2, %sum /* IEU1 */; \ - inc %x4 /* IEU0 Group */; \ - add %sum, %x3, %sum /* IEU1 */; \ - fcmpgt32 %fz, %f2, %x7 /* FPM Group */; \ - srl %x4, 1, %x4 /* IEU0 */; \ - inc %x5 /* IEU1 */; \ - fpadd32 %T0, %T1, %U0 /* FPA */; \ - fcmpgt32 %fz, %f6, %x8 /* FPM Group */; \ - srl %x5, 1, %x5 /* IEU0 */; \ - add %sum, %x4, %sum /* IEU1 */; \ - inc %x6 /* IEU0 Group */; \ - add %sum, %x5, %sum /* IEU1 */; \ - fcmpgt32 %fz, %f10, %x1 /* FPM Group */; \ - srl %x6, 1, %x6 /* IEU0 */; \ - inc %x7 /* IEU1 */; \ - fcmpgt32 %fz, %f14, %x2 /* FPM Group */; \ - ba,pt %xcc, ett /* CTI */; \ - fmovd %FA, %FB /* FPA */; \ + inc %x5 /* IEU0 Group */; \ + fpadd32 %f2, %f0, %S0 /* FPA */; \ + add %sum, %x4, %sum /* IEU1 */; \ + srl %x5, 1, %x5 /* IEU0 Group */; \ + fpadd32 %f6, %f4, %S1 /* FPA */; \ + inc %x6 /* IEU1 */; \ + fpadd32 %f10, %f8, %S2 /* FPA Group */; \ + add %sum, %x5, %sum /* IEU0 */; \ + fcmpgt32 %f0, %S0, %x1 /* FPM */; \ + fpadd32 %f14, %f12, %S3 /* FPA Group */; \ + srl %x6, 1, %x6 /* IEU0 */; \ + fcmpgt32 %f4, %S1, %x2 /* FPM */; \ + add %sum, %x6, %sum /* IEU0 Group */; \ + fzero %fz /* FPA */; \ + fcmpgt32 %f8, %S2, %x3 /* FPM */; \ + inc %x7 /* IEU0 Group */; \ + inc %x8 /* IEU1 */; \ + srl %x7, 1, %x7 /* IEU0 Group */; \ + inc %x1 /* IEU1 */; \ + fpadd32 %S0, %S1, %T0 /* FPA */; \ + fpadd32 %S2, %S3, %T1 /* FPA Group */; \ + add %sum, %x7, %sum /* IEU0 */; \ + fcmpgt32 %f12, %S3, %x4 /* FPM */; \ + srl %x8, 1, %x8 /* IEU0 Group */; \ + inc %x2 /* IEU1 */; \ + srl %x1, 1, %x1 /* IEU0 Group */; \ + add %sum, %x8, %sum /* IEU1 */; \ + add %sum, %x1, %sum /* IEU0 Group */; \ + fcmpgt32 %S0, %T0, %x5 /* FPM */; \ + srl %x2, 1, %x2 /* IEU0 Group */; \ + fcmpgt32 %S2, %T1, %x6 /* FPM */; \ + inc %x3 /* IEU0 Group */; \ + add %sum, %x2, %sum /* IEU1 */; \ + srl %x3, 1, %x3 /* IEU0 Group */; \ + inc %x4 /* IEU1 */; \ + fpadd32 %T0, %T1, %U0 /* FPA Group */; \ + add %sum, %x3, %sum /* IEU0 */; \ + fcmpgt32 %fz, %f2, %x7 /* FPM */; \ + srl %x4, 1, %x4 /* IEU0 Group */; \ + fcmpgt32 %fz, %f6, %x8 /* FPM */; \ + inc %x5 /* IEU0 Group */; \ + add %sum, %x4, %sum /* IEU1 */; \ + srl %x5, 1, %x5 /* IEU0 Group */; \ + fcmpgt32 %fz, %f10, %x1 /* FPM */; \ + inc %x6 /* IEU0 Group */; \ + add %sum, %x5, %sum /* IEU1 */; \ + fmovd %FA, %FB /* FPA Group */; \ + fcmpgt32 %fz, %f14, %x2 /* FPM */; \ + srl %x6, 1, %x6 /* IEU0 Group */; \ + ba,pt %xcc, ett /* CTI */; \ + inc %x7 /* IEU1 */; -#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \ +#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \ END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,f48,f50,f52,f54,f56,f58,f60,f62) -#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \ - fpadd32 %U0, %U1, %V0 /* FPA Group */; \ - srl %x7, 1, %x7 /* IEU0 */; \ - add %sum, %x6, %sum /* IEU1 */; \ - std %V0, [%sp + STACKOFF] /* Store Group */; \ - inc %x8 /* IEU0 */; \ - sub %sum, %x7, %sum /* IEU1 */; \ - fcmpgt32 %fz, %S1, %x3 /* FPM Group */; \ - srl %x8, 1, %x8 /* IEU0 */; \ - inc %x1 /* IEU1 */; \ - fcmpgt32 %fz, %S3, %x4 /* FPM Group */; \ - srl %x1, 1, %x1 /* IEU0 */; \ - sub %sum, %x8, %sum /* IEU1 */; \ - ldx [%sp + STACKOFF], %x8 /* Load Group */; \ - inc %x2 /* IEU0 */; \ - sub %sum, %x1, %sum /* IEU1 */; \ - fcmpgt32 %fz, %T1, %x5 /* FPM Group */; \ - srl %x2, 1, %x2 /* IEU0 */; \ - inc %x3 /* IEU1 */; \ - fcmpgt32 %T0, %U0, %x6 /* FPM Group */; \ - srl %x3, 1, %x3 /* IEU0 */; \ - sub %sum, %x2, %sum /* IEU1 */; \ - inc %x4 /* IEU0 Group */; \ - sub %sum, %x3, %sum /* IEU1 */; \ - fcmpgt32 %fz, %U1, %x7 /* FPM Group */; \ - srl %x4, 1, %x4 /* IEU0 */; \ - inc %x5 /* IEU1 */; \ - fcmpgt32 %U0, %V0, %x1 /* FPM Group */; \ - srl %x5, 1, %x5 /* IEU0 */; \ - sub %sum, %x4, %sum /* IEU1 */; \ - fcmpgt32 %fz, %V0, %x2 /* FPM Group */; \ - inc %x6 /* IEU0 */; \ - sub %sum, %x5, %sum /* IEU1 */; \ - srl %x6, 1, %x6 /* IEU0 Group */; \ - inc %x7 /* IEU1 */; \ - srl %x7, 1, %x7 /* IEU0 Group */; \ - add %sum, %x6, %sum /* IEU1 */; \ - inc %x1 /* IEU0 Group */; \ - sub %sum, %x7, %sum /* IEU1 */; \ - srl %x1, 1, %x1 /* IEU0 Group */; \ - inc %x2 /* IEU1 */; \ - srl %x2, 1, %x2 /* IEU0 Group */; \ - add %sum, %x1, %sum /* IEU1 */; \ - sub %sum, %x2, %sum /* IEU0 Group */; \ - addcc %sum, %x8, %sum /* IEU Group */; \ - bcs,a,pn %xcc, 33f /* CTI */; \ - add %sum, 1, %sum /* IEU0 */; \ -33: /* That's it */; +#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \ + fpadd32 %U0, %U1, %V0 /* FPA Group */; \ + srl %x7, 1, %x7 /* IEU0 */; \ + add %sum, %x6, %sum /* IEU1 */; \ + std %V0, [%sp + STACKOFF] /* Store Group */; \ + inc %x8 /* IEU0 */; \ + sub %sum, %x7, %sum /* IEU1 */; \ + srl %x8, 1, %x8 /* IEU0 Group */; \ + fcmpgt32 %fz, %S1, %x3 /* FPM */; \ + inc %x1 /* IEU0 Group */; \ + fcmpgt32 %fz, %S3, %x4 /* FPM */; \ + srl %x1, 1, %x1 /* IEU0 Group */; \ + sub %sum, %x8, %sum /* IEU1 */; \ + ldx [%sp + STACKOFF], %x8 /* Load Group */; \ + inc %x2 /* IEU0 */; \ + sub %sum, %x1, %sum /* IEU1 */; \ + srl %x2, 1, %x2 /* IEU0 Group */; \ + fcmpgt32 %fz, %T1, %x5 /* FPM */; \ + inc %x3 /* IEU0 Group */; \ + fcmpgt32 %T0, %U0, %x6 /* FPM */; \ + srl %x3, 1, %x3 /* IEU0 Group */; \ + sub %sum, %x2, %sum /* IEU1 */; \ + inc %x4 /* IEU0 Group */; \ + sub %sum, %x3, %sum /* IEU1 */; \ + srl %x4, 1, %x4 /* IEU0 Group */; \ + fcmpgt32 %fz, %U1, %x7 /* FPM */; \ + inc %x5 /* IEU0 Group */; \ + fcmpgt32 %U0, %V0, %x1 /* FPM */; \ + srl %x5, 1, %x5 /* IEU0 Group */; \ + sub %sum, %x4, %sum /* IEU1 */; \ + sub %sum, %x5, %sum /* IEU0 Group */; \ + fcmpgt32 %fz, %V0, %x2 /* FPM */; \ + inc %x6 /* IEU0 Group */; \ + inc %x7 /* IEU1 */; \ + srl %x6, 1, %x6 /* IEU0 Group */; \ + inc %x1 /* IEU1 */; \ + srl %x7, 1, %x7 /* IEU0 Group */; \ + add %sum, %x6, %sum /* IEU1 */; \ + srl %x1, 1, %x1 /* IEU0 Group */; \ + sub %sum, %x7, %sum /* IEU1 */; \ + inc %x2 /* IEU0 Group */; \ + add %sum, %x1, %sum /* IEU1 */; \ + srl %x2, 1, %x2 /* IEU0 Group */; \ + sub %sum, %x2, %sum /* IEU0 Group */; \ + addcc %sum, %x8, %sum /* IEU1 Group */; \ + bcs,a,pn %xcc, 33f /* CTI */; \ + add %sum, 1, %sum /* IEU0 (Group) */; \ +33: /* That's it */; .text .globl csum_partial_copy_vis .align 32 -/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp. csum_partial_copy_from_user */ -/* This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256 */ +/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp. + * csum_partial_copy_from_user + * This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256 + */ csum_partial_copy_vis: - andcc %dst, 7, %g0 /* IEU1 Group */ - be,pt %icc, 4f /* CTI */ - and %dst, 0x38, %o4 /* IEU0 */ - mov 1, %g5 /* IEU0 Group */ - andcc %dst, 2, %g0 /* IEU1 */ - be,pt %icc, 1f /* CTI */ - and %dst, 4, %g7 /* IEU0 Group */ - lduha [%src] %asi, %g2 /* Load */ - sub %len, 2, %len /* IEU0 Group */ - add %dst, 2, %dst /* IEU1 */ - andcc %dst, 4, %g7 /* IEU1 Group */ - sll %g5, 16, %g5 /* IEU0 */ - sth %g2, [%dst - 2] /* Store Group */ - sll %g2, 16, %g2 /* IEU0 */ - add %src, 2, %src /* IEU1 */ - addcc %g2, %sum, %sum /* IEU1 Group */ - bcs,a,pn %icc, 1f /* CTI */ - add %sum, %g5, %sum /* IEU0 */ -1: lduwa [%src] %asi, %g2 /* Load */ - brz,a,pn %g7, 4f /* CTI+IEU1 Group */ - and %dst, 0x38, %o4 /* IEU0 */ - add %dst, 4, %dst /* IEU0 Group */ - sub %len, 4, %len /* IEU1 */ - addcc %g2, %sum, %sum /* IEU1 Group */ - bcs,a,pn %icc, 1f /* CTI */ - add %sum, 1, %sum /* IEU0 */ -1: and %dst, 0x38, %o4 /* IEU0 Group */ - stw %g2, [%dst - 4] /* Store */ - add %src, 4, %src /* IEU1 */ + andcc %dst, 7, %g0 /* IEU1 Group */ + be,pt %icc, 4f /* CTI */ + and %dst, 0x38, %o4 /* IEU0 */ + mov 1, %g5 /* IEU0 Group */ + andcc %dst, 2, %g0 /* IEU1 */ + be,pt %icc, 1f /* CTI */ + and %dst, 4, %g7 /* IEU0 Group */ + lduha [%src] %asi, %g2 /* Load */ + sub %len, 2, %len /* IEU0 Group */ + add %dst, 2, %dst /* IEU1 */ + andcc %dst, 4, %g7 /* IEU1 Group */ + sll %g5, 16, %g5 /* IEU0 */ + sth %g2, [%dst - 2] /* Store Group */ + sll %g2, 16, %g2 /* IEU0 */ + add %src, 2, %src /* IEU1 */ + addcc %g2, %sum, %sum /* IEU1 Group */ + bcs,a,pn %icc, 1f /* CTI */ + add %sum, %g5, %sum /* IEU0 */ +1: lduwa [%src] %asi, %g2 /* Load */ + brz,a,pn %g7, 4f /* CTI+IEU1 Group */ + and %dst, 0x38, %o4 /* IEU0 */ + add %dst, 4, %dst /* IEU0 Group */ + sub %len, 4, %len /* IEU1 */ + addcc %g2, %sum, %sum /* IEU1 Group */ + bcs,a,pn %icc, 1f /* CTI */ + add %sum, 1, %sum /* IEU0 */ +1: and %dst, 0x38, %o4 /* IEU0 Group */ + stw %g2, [%dst - 4] /* Store */ + add %src, 4, %src /* IEU1 */ 4: #ifdef __KERNEL__ VISEntry #endif - mov %src, %g7 /* IEU1 Group */ - fzero %f48 /* FPA */ - alignaddr %src, %g0, %src /* Single Group */ - subcc %g7, %src, %g7 /* IEU1 Group */ - be,pt %xcc, 1f /* CTI */ - mov 0x40, %g1 /* IEU0 */ - lduwa [%src] %asi, %g2 /* Load Group */ - subcc %sum, %g2, %sum /* IEU1 Group+load stall */ - bcs,a,pn %icc, 1f /* CTI */ - sub %sum, 1, %sum /* IEU0 */ -1: srl %sum, 0, %sum /* IEU0 Group */ - clr %g5 /* IEU1 */ - brz,pn %o4, 3f /* CTI+IEU1 Group */ - sub %g1, %o4, %g1 /* IEU0 */ - ldda [%src] %asi, %f0 /* Load */ - clr %o4 /* IEU0 Group */ - andcc %dst, 8, %g0 /* IEU1 */ - be,pn %icc, 1f /* CTI */ - ldda [%src + 8] %asi, %f2 /* Load Group */ - add %src, 8, %src /* IEU0 */ - sub %len, 8, %len /* IEU1 */ - fpadd32 %f0, %f48, %f50 /* FPA */ - addcc %dst, 8, %dst /* IEU1 Group */ - faligndata %f0, %f2, %f16 /* FPA */ - fcmpgt32 %f48, %f50, %o4 /* FPM Group */ - fmovd %f2, %f0 /* FPA Group */ - ldda [%src + 8] %asi, %f2 /* Load */ - std %f16, [%dst - 8] /* Store */ - fmovd %f50, %f48 /* FPA */ -1: andcc %g1, 0x10, %g0 /* IEU1 Group */ - be,pn %icc, 1f /* CTI */ - and %g1, 0x20, %g1 /* IEU0 */ - fpadd32 %f0, %f48, %f50 /* FPA */ - ldda [%src + 16] %asi, %f4 /* Load Group */ - add %src, 16, %src /* IEU0 */ - add %dst, 16, %dst /* IEU1 */ - faligndata %f0, %f2, %f16 /* FPA */ - fcmpgt32 %f48, %f50, %g5 /* FPM Group */ - sub %len, 16, %len /* IEU0 */ - inc %o4 /* IEU1 */ - std %f16, [%dst - 16] /* Store Group */ - fpadd32 %f2, %f50, %f48 /* FPA */ - srl %o4, 1, %o5 /* IEU0 */ - faligndata %f2, %f4, %f18 /* FPA Group */ - std %f18, [%dst - 8] /* Store */ - fcmpgt32 %f50, %f48, %o4 /* FPM Group */ - add %o5, %sum, %sum /* IEU0 */ - ldda [%src + 8] %asi, %f2 /* Load */ - fmovd %f4, %f0 /* FPA */ -1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */ - rd %asi, %g2 /* LSU Group + 4 bubbles */ - inc %g5 /* IEU0 */ - fpadd32 %f0, %f48, %f50 /* FPA */ - ldda [%src + 16] %asi, %f4 /* Load Group */ - srl %g5, 1, %g5 /* IEU0 */ - add %dst, 32, %dst /* IEU1 */ - faligndata %f0, %f2, %f16 /* FPA */ - fcmpgt32 %f48, %f50, %o5 /* FPM Group */ - inc %o4 /* IEU0 */ - ldda [%src + 24] %asi, %f6 /* Load */ - srl %o4, 1, %o4 /* IEU0 Group */ - add %g5, %sum, %sum /* IEU1 */ - ldda [%src + 32] %asi, %f8 /* Load */ - fpadd32 %f2, %f50, %f48 /* FPA */ - faligndata %f2, %f4, %f18 /* FPA Group */ - sub %len, 32, %len /* IEU0 */ - std %f16, [%dst - 32] /* Store */ - fcmpgt32 %f50, %f48, %g3 /* FPM Group */ - inc %o5 /* IEU0 */ - add %o4, %sum, %sum /* IEU1 */ - fpadd32 %f4, %f48, %f50 /* FPA */ - faligndata %f4, %f6, %f20 /* FPA Group */ - srl %o5, 1, %o5 /* IEU0 */ - fcmpgt32 %f48, %f50, %g5 /* FPM Group */ - add %o5, %sum, %sum /* IEU0 */ - std %f18, [%dst - 24] /* Store */ - fpadd32 %f6, %f50, %f48 /* FPA */ - inc %g3 /* IEU0 Group */ - std %f20, [%dst - 16] /* Store */ - add %src, 32, %src /* IEU1 */ - faligndata %f6, %f8, %f22 /* FPA */ - fcmpgt32 %f50, %f48, %o4 /* FPM Group */ - srl %g3, 1, %g3 /* IEU0 */ - std %f22, [%dst - 8] /* Store */ - add %g3, %sum, %sum /* IEU0 Group */ -3: rd %asi, %g2 /* LSU Group + 4 bubbles */ + mov %src, %g7 /* IEU1 Group */ + fzero %f48 /* FPA */ + alignaddr %src, %g0, %src /* Single Group */ + subcc %g7, %src, %g7 /* IEU1 Group */ + be,pt %xcc, 1f /* CTI */ + mov 0x40, %g1 /* IEU0 */ + lduwa [%src] %asi, %g2 /* Load Group */ + subcc %sum, %g2, %sum /* IEU1 Group+load stall*/ + bcs,a,pn %icc, 1f /* CTI */ + sub %sum, 1, %sum /* IEU0 */ +1: srl %sum, 0, %sum /* IEU0 Group */ + clr %g5 /* IEU1 */ + brz,pn %o4, 3f /* CTI+IEU1 Group */ + sub %g1, %o4, %g1 /* IEU0 */ + ldda [%src] %asi, %f0 /* Load */ + clr %o4 /* IEU0 Group */ + andcc %dst, 8, %g0 /* IEU1 */ + be,pn %icc, 1f /* CTI */ + ldda [%src + 8] %asi, %f2 /* Load Group */ + add %src, 8, %src /* IEU0 */ + sub %len, 8, %len /* IEU1 */ + fpadd32 %f0, %f48, %f50 /* FPA */ + addcc %dst, 8, %dst /* IEU1 Group */ + faligndata %f0, %f2, %f16 /* FPA */ + fcmpgt32 %f48, %f50, %o4 /* FPM Group */ + fmovd %f2, %f0 /* FPA Group */ + ldda [%src + 8] %asi, %f2 /* Load */ + std %f16, [%dst - 8] /* Store */ + fmovd %f50, %f48 /* FPA */ +1: andcc %g1, 0x10, %g0 /* IEU1 Group */ + be,pn %icc, 1f /* CTI */ + and %g1, 0x20, %g1 /* IEU0 */ + fpadd32 %f0, %f48, %f50 /* FPA */ + ldda [%src + 16] %asi, %f4 /* Load Group */ + add %src, 16, %src /* IEU0 */ + add %dst, 16, %dst /* IEU1 */ + faligndata %f0, %f2, %f16 /* FPA */ + fcmpgt32 %f48, %f50, %g5 /* FPM Group */ + sub %len, 16, %len /* IEU0 */ + inc %o4 /* IEU1 */ + std %f16, [%dst - 16] /* Store Group */ + fpadd32 %f2, %f50, %f48 /* FPA */ + srl %o4, 1, %o5 /* IEU0 */ + faligndata %f2, %f4, %f18 /* FPA Group */ + std %f18, [%dst - 8] /* Store */ + fcmpgt32 %f50, %f48, %o4 /* FPM Group */ + add %o5, %sum, %sum /* IEU0 */ + ldda [%src + 8] %asi, %f2 /* Load */ + fmovd %f4, %f0 /* FPA */ +1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */ + rd %asi, %g2 /* LSU Group + 4 bubbles*/ + inc %g5 /* IEU0 */ + fpadd32 %f0, %f48, %f50 /* FPA */ + ldda [%src + 16] %asi, %f4 /* Load Group */ + srl %g5, 1, %g5 /* IEU0 */ + add %dst, 32, %dst /* IEU1 */ + faligndata %f0, %f2, %f16 /* FPA */ + fcmpgt32 %f48, %f50, %o5 /* FPM Group */ + inc %o4 /* IEU0 */ + ldda [%src + 24] %asi, %f6 /* Load */ + srl %o4, 1, %o4 /* IEU0 Group */ + add %g5, %sum, %sum /* IEU1 */ + ldda [%src + 32] %asi, %f8 /* Load */ + fpadd32 %f2, %f50, %f48 /* FPA */ + faligndata %f2, %f4, %f18 /* FPA Group */ + sub %len, 32, %len /* IEU0 */ + std %f16, [%dst - 32] /* Store */ + fcmpgt32 %f50, %f48, %g3 /* FPM Group */ + inc %o5 /* IEU0 */ + add %o4, %sum, %sum /* IEU1 */ + fpadd32 %f4, %f48, %f50 /* FPA */ + faligndata %f4, %f6, %f20 /* FPA Group */ + srl %o5, 1, %o5 /* IEU0 */ + fcmpgt32 %f48, %f50, %g5 /* FPM Group */ + add %o5, %sum, %sum /* IEU0 */ + std %f18, [%dst - 24] /* Store */ + fpadd32 %f6, %f50, %f48 /* FPA */ + inc %g3 /* IEU0 Group */ + std %f20, [%dst - 16] /* Store */ + add %src, 32, %src /* IEU1 */ + faligndata %f6, %f8, %f22 /* FPA */ + fcmpgt32 %f50, %f48, %o4 /* FPM Group */ + srl %g3, 1, %g3 /* IEU0 */ + std %f22, [%dst - 8] /* Store */ + add %g3, %sum, %sum /* IEU0 Group */ +3: rd %asi, %g2 /* LSU Group + 4 bubbles*/ #ifdef __KERNEL__ -4: sethi %hi(vis0s), %g7 /* IEU0 Group */ - or %g2, ASI_BLK_OR, %g2 /* IEU1 */ +4: sethi %hi(vis0s), %g7 /* IEU0 Group */ + or %g2, ASI_BLK_OR, %g2 /* IEU1 */ #else -4: rd %pc, %g7 /* LSU Group + 4 bubbles */ +4: rd %pc, %g7 /* LSU Group + 4 bubbles*/ #endif - inc %g5 /* IEU0 Group */ - and %src, 0x38, %g3 /* IEU1 */ - membar #StoreLoad /* LSU Group */ - srl %g5, 1, %g5 /* IEU0 */ - inc %o4 /* IEU1 */ - sll %g3, 8, %g3 /* IEU0 Group */ - sub %len, 0xc0, %len /* IEU1 */ - addcc %g5, %sum, %sum /* IEU1 Group */ - srl %o4, 1, %o4 /* IEU0 */ - add %g7, %g3, %g7 /* IEU0 Group */ - add %o4, %sum, %sum /* IEU1 */ + inc %g5 /* IEU0 Group */ + and %src, 0x38, %g3 /* IEU1 */ + membar #StoreLoad /* LSU Group */ + srl %g5, 1, %g5 /* IEU0 */ + inc %o4 /* IEU1 */ + sll %g3, 8, %g3 /* IEU0 Group */ + sub %len, 0xc0, %len /* IEU1 */ + addcc %g5, %sum, %sum /* IEU1 Group */ + srl %o4, 1, %o4 /* IEU0 */ + add %g7, %g3, %g7 /* IEU0 Group */ + add %o4, %sum, %sum /* IEU1 */ #ifdef __KERNEL__ - jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */ + jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */ #else - jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */ + jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */ #endif - fzero %f32 /* FPA */ + fzero %f32 /* FPA */ .align 2048 -vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - add %src, 128, %src /* IEU0 Group */ - ldda [%src-128] %asi, %f0 /* Load Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f48, %f62 /* FPA Group f0 available */ - faligndata %f0, %f2, %f48 /* FPA Group f2 available */ - fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available */ - fpadd32 %f0, %f62, %f0 /* FPA */ - fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available */ - faligndata %f2, %f4, %f50 /* FPA */ - fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available */ - faligndata %f4, %f6, %f52 /* FPA */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available */ - inc %x1 /* IEU0 */ - faligndata %f6, %f8, %f54 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available */ - srl %x1, 1, %x1 /* IEU0 */ - inc %x2 /* IEU1 */ - faligndata %f8, %f10, %f56 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available */ - srl %x2, 1, %x2 /* IEU0 */ - add %sum, %x1, %sum /* IEU1 */ - faligndata %f10, %f12, %f58 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - inc %x3 /* IEU0 */ - add %sum, %x2, %sum /* IEU1 */ - faligndata %f12, %f14, %f60 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - srl %x3, 1, %x3 /* IEU0 */ - inc %x4 /* IEU1 */ - fmovd %f14, %f62 /* FPA */ - srl %x4, 1, %x4 /* IEU0 Group */ - add %sum, %x3, %sum /* IEU1 */ +vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + add %src, 128, %src /* IEU0 Group */ + ldda [%src-128] %asi, %f0 /* Load Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f48, %f62 /* FPA Group f0 available*/ + faligndata %f0, %f2, %f48 /* FPA Group f2 available*/ + fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available*/ + fpadd32 %f0, %f62, %f0 /* FPA */ + fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available*/ + faligndata %f2, %f4, %f50 /* FPA */ + fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available*/ + faligndata %f4, %f6, %f52 /* FPA */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available*/ + inc %x1 /* IEU0 */ + faligndata %f6, %f8, %f54 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available*/ + srl %x1, 1, %x1 /* IEU0 */ + inc %x2 /* IEU1 */ + faligndata %f8, %f10, %f56 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available*/ + srl %x2, 1, %x2 /* IEU0 */ + add %sum, %x1, %sum /* IEU1 */ + faligndata %f10, %f12, %f58 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + inc %x3 /* IEU0 */ + add %sum, %x2, %sum /* IEU1 */ + faligndata %f12, %f14, %f60 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + srl %x3, 1, %x3 /* IEU0 */ + inc %x4 /* IEU1 */ + fmovd %f14, %f62 /* FPA */ + srl %x4, 1, %x4 /* IEU0 Group */ + add %sum, %x3, %sum /* IEU1 */ vis0: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, - ,f48,f50,f52,f54,f56,f58,f60,f62,f62, - ,LDBLK(f32), STBLK,,,,,,,, + ,f48,f50,f52,f54,f56,f58,f60,f62,f62, + ,LDBLK(f32), STBLK,,,,,,,, ,bcs,pn %icc, vis0e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, - ,f48,f50,f52,f54,f56,f58,f60,f62,f62, - ,LDBLK(f0), STBLK,,,,,,,, + ,f48,f50,f52,f54,f56,f58,f60,f62,f62, + ,LDBLK(f0), STBLK,,,,,,,, ,bcs,pn %icc, vis0e2) - DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, + DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f48,f50,f52,f54,f56,f58,f60,f62,f62, ,LDBLK(f16), STBLK,,,,,,,, ,bcc,pt %icc, vis0) -vis0e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, +vis0e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f48,f50,f52,f54,f56,f58,f60,f62,f32, ,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e2) @@ -447,39 +449,39 @@ vis0e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f4 ,f48,f50,f52,f54,f56,f58,f60,f62,f0, ,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e3) -vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, +vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f48,f50,f52,f54,f56,f58,f60,f62,f16, ,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e1) .align 2048 -vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - add %src, 128 - 8, %src /* IEU0 Group */ - ldda [%src-128] %asi, %f0 /* Load Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f0, %f58 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - fcmpgt32 %f32, %f2, %x2 /* FPM Group */ - faligndata %f2, %f4, %f48 /* FPA */ - fcmpgt32 %f32, %f4, %x3 /* FPM Group */ - faligndata %f4, %f6, %f50 /* FPA */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group */ - faligndata %f6, %f8, %f52 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - inc %x2 /* IEU1 */ - faligndata %f8, %f10, %f54 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - srl %x2, 1, %x2 /* IEU0 */ - faligndata %f10, %f12, %f56 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - inc %x3 /* IEU0 */ - add %sum, %x2, %sum /* IEU1 */ - faligndata %f12, %f14, %f58 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - srl %x3, 1, %x3 /* IEU0 */ - inc %x4 /* IEU1 */ - fmovd %f14, %f60 /* FPA */ - srl %x4, 1, %x4 /* IEU0 Group */ - add %sum, %x3, %sum /* IEU1 */ +vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + add %src, 128 - 8, %src /* IEU0 Group */ + ldda [%src-128] %asi, %f0 /* Load Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f0, %f58 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + fcmpgt32 %f32, %f2, %x2 /* FPM Group */ + faligndata %f2, %f4, %f48 /* FPA */ + fcmpgt32 %f32, %f4, %x3 /* FPM Group */ + faligndata %f4, %f6, %f50 /* FPA */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group */ + faligndata %f6, %f8, %f52 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + inc %x2 /* IEU1 */ + faligndata %f8, %f10, %f54 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + srl %x2, 1, %x2 /* IEU0 */ + faligndata %f10, %f12, %f56 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + inc %x3 /* IEU0 */ + add %sum, %x2, %sum /* IEU1 */ + faligndata %f12, %f14, %f58 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + srl %x3, 1, %x3 /* IEU0 */ + inc %x4 /* IEU1 */ + fmovd %f14, %f60 /* FPA */ + srl %x4, 1, %x4 /* IEU0 Group */ + add %sum, %x3, %sum /* IEU1 */ vis1: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f62,f48,f50,f52,f54,f56,f58,f60,f60, ,LDBLK(f32), ,STBLK,,,,,,, @@ -505,31 +507,31 @@ vis1e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,STBLK,ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40), ,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e1) .align 2048 -vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - add %src, 128 - 16, %src /* IEU0 Group */ - ldda [%src-128] %asi, %f0 /* Load Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f0, %f56 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - sub %dst, 64, %dst /* IEU0 */ - fpsub32 %f2, %f2, %f2 /* FPA Group */ - fcmpgt32 %f32, %f4, %x3 /* FPM Group */ - faligndata %f4, %f6, %f48 /* FPA */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group */ - faligndata %f6, %f8, %f50 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - faligndata %f8, %f10, %f52 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - faligndata %f10, %f12, %f54 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - inc %x3 /* IEU0 */ - faligndata %f12, %f14, %f56 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - srl %x3, 1, %x3 /* IEU0 */ - inc %x4 /* IEU1 */ - fmovd %f14, %f58 /* FPA */ - srl %x4, 1, %x4 /* IEU0 Group */ - add %sum, %x3, %sum /* IEU1 */ +vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + add %src, 128 - 16, %src /* IEU0 Group */ + ldda [%src-128] %asi, %f0 /* Load Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f0, %f56 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + sub %dst, 64, %dst /* IEU0 */ + fpsub32 %f2, %f2, %f2 /* FPA Group */ + fcmpgt32 %f32, %f4, %x3 /* FPM Group */ + faligndata %f4, %f6, %f48 /* FPA */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group */ + faligndata %f6, %f8, %f50 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + faligndata %f8, %f10, %f52 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + faligndata %f10, %f12, %f54 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + inc %x3 /* IEU0 */ + faligndata %f12, %f14, %f56 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + srl %x3, 1, %x3 /* IEU0 */ + inc %x4 /* IEU1 */ + fmovd %f14, %f58 /* FPA */ + srl %x4, 1, %x4 /* IEU0 Group */ + add %sum, %x3, %sum /* IEU1 */ vis2: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f60,f62,f48,f50,f52,f54,f56,f58,f58, ,LDBLK(f32), ,,STBLK,,,,,, @@ -555,27 +557,27 @@ vis2e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96), ,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e1) .align 2048 -vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - add %src, 128 - 24, %src /* IEU0 Group */ - ldda [%src-128] %asi, %f0 /* Load Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f0, %f54 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - sub %dst, 64, %dst /* IEU0 */ - fpsub32 %f2, %f2, %f2 /* FPA Group */ - fpsub32 %f4, %f4, %f4 /* FPA Group */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group */ - faligndata %f6, %f8, %f48 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - faligndata %f8, %f10, %f50 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - faligndata %f10, %f12, %f52 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - faligndata %f12, %f14, %f54 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - fmovd %f14, %f56 /* FPA */ - inc %x4 /* IEU0 */ - srl %x4, 1, %x4 /* IEU0 Group */ +vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + add %src, 128 - 24, %src /* IEU0 Group */ + ldda [%src-128] %asi, %f0 /* Load Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f0, %f54 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + sub %dst, 64, %dst /* IEU0 */ + fpsub32 %f2, %f2, %f2 /* FPA Group */ + fpsub32 %f4, %f4, %f4 /* FPA Group */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group */ + faligndata %f6, %f8, %f48 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + faligndata %f8, %f10, %f50 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + faligndata %f10, %f12, %f52 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + faligndata %f12, %f14, %f54 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + fmovd %f14, %f56 /* FPA */ + inc %x4 /* IEU0 */ + srl %x4, 1, %x4 /* IEU0 Group */ vis3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f58,f60,f62,f48,f50,f52,f54,f56,f56, ,LDBLK(f32), ,,,STBLK,,,,, @@ -601,25 +603,25 @@ vis3e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88), ,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e1) .align 2048 -vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - add %src, 128 - 32, %src /* IEU0 Group */ - ldda [%src-128] %asi, %f0 /* Load Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f0, %f52 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - sub %dst, 64, %dst /* IEU0 */ - fpsub32 %f2, %f2, %f2 /* FPA Group */ - fpsub32 %f4, %f4, %f4 /* FPA Group */ - fpsub32 %f6, %f6, %f6 /* FPA Group */ - clr %x4 /* IEU0 */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - faligndata %f8, %f10, %f48 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - faligndata %f10, %f12, %f50 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - faligndata %f12, %f14, %f52 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - fmovd %f14, %f54 /* FPA */ +vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + add %src, 128 - 32, %src /* IEU0 Group */ + ldda [%src-128] %asi, %f0 /* Load Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f0, %f52 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + sub %dst, 64, %dst /* IEU0 */ + fpsub32 %f2, %f2, %f2 /* FPA Group */ + fpsub32 %f4, %f4, %f4 /* FPA Group */ + fpsub32 %f6, %f6, %f6 /* FPA Group */ + clr %x4 /* IEU0 */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + faligndata %f8, %f10, %f48 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + faligndata %f10, %f12, %f50 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + faligndata %f12, %f14, %f52 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + fmovd %f14, %f54 /* FPA */ vis4: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f56,f58,f60,f62,f48,f50,f52,f54,f54, ,LDBLK(f32), ,,,,STBLK,,,, @@ -645,26 +647,26 @@ vis4e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80), ,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e1) .align 2048 -vis5s: add %src, 128 - 40, %src /* IEU0 Group */ - ldda [%src-88] %asi, %f10 /* Load Group */ - ldda [%src-80] %asi, %f12 /* Load Group */ - ldda [%src-72] %asi, %f14 /* Load Group */ - wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f48, %f0 /* FPA Group */ - fmuld %f32, %f32, %f2 /* FPM */ - clr %x4 /* IEU0 */ - faddd %f32, %f32, %f4 /* FPA Group */ - fmuld %f32, %f32, %f6 /* FPM */ - clr %x5 /* IEU0 */ - faddd %f32, %f32, %f8 /* FPA Group */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - sub %dst, 64, %dst /* IEU0 */ - faligndata %f10, %f12, %f48 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - faligndata %f12, %f14, %f50 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - fmovd %f14, %f52 /* FPA */ +vis5s: add %src, 128 - 40, %src /* IEU0 Group */ + ldda [%src-88] %asi, %f10 /* Load Group */ + ldda [%src-80] %asi, %f12 /* Load Group */ + ldda [%src-72] %asi, %f14 /* Load Group */ + wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f48, %f0 /* FPA Group */ + fmuld %f32, %f32, %f2 /* FPM */ + clr %x4 /* IEU0 */ + faddd %f32, %f32, %f4 /* FPA Group */ + fmuld %f32, %f32, %f6 /* FPM */ + clr %x5 /* IEU0 */ + faddd %f32, %f32, %f8 /* FPA Group */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + sub %dst, 64, %dst /* IEU0 */ + faligndata %f10, %f12, %f48 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + faligndata %f12, %f14, %f50 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + fmovd %f14, %f52 /* FPA */ vis5: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f54,f56,f58,f60,f62,f48,f50,f52,f52, ,LDBLK(f32), ,,,,,STBLK,,, @@ -690,25 +692,25 @@ vis5e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,,,STBLK,ST(f48,64),ST(f50,72), ,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e1) .align 2048 -vis6s: add %src, 128 - 48, %src /* IEU0 Group */ - ldda [%src-80] %asi, %f12 /* Load Group */ - ldda [%src-72] %asi, %f14 /* Load Group */ - wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f48, %f0 /* FPA Group */ - fmuld %f32, %f32, %f2 /* FPM */ - clr %x4 /* IEU0 */ - faddd %f32, %f32, %f4 /* FPA Group */ - fmuld %f32, %f32, %f6 /* FPM */ - clr %x5 /* IEU0 */ - faddd %f32, %f32, %f8 /* FPA Group */ - fmuld %f32, %f32, %f10 /* FPM */ - clr %x6 /* IEU0 */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - sub %dst, 64, %dst /* IEU0 */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - faligndata %f12, %f14, %f48 /* FPA */ - fmovd %f14, %f50 /* FPA Group */ +vis6s: add %src, 128 - 48, %src /* IEU0 Group */ + ldda [%src-80] %asi, %f12 /* Load Group */ + ldda [%src-72] %asi, %f14 /* Load Group */ + wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f48, %f0 /* FPA Group */ + fmuld %f32, %f32, %f2 /* FPM */ + clr %x4 /* IEU0 */ + faddd %f32, %f32, %f4 /* FPA Group */ + fmuld %f32, %f32, %f6 /* FPM */ + clr %x5 /* IEU0 */ + faddd %f32, %f32, %f8 /* FPA Group */ + fmuld %f32, %f32, %f10 /* FPM */ + clr %x6 /* IEU0 */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + sub %dst, 64, %dst /* IEU0 */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + faligndata %f12, %f14, %f48 /* FPA */ + fmovd %f14, %f50 /* FPA Group */ vis6: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f52,f54,f56,f58,f60,f62,f48,f50,f50, ,LDBLK(f32), ,,,,,,STBLK,, @@ -734,24 +736,24 @@ vis6e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,,,,STBLK,ST(f48,64), ,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e1) .align 2048 -vis7s: add %src, 128 - 56, %src /* IEU0 Group */ - ldda [%src-72] %asi, %f14 /* Load Group */ - wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src-64] %asi, %f16 /* Load Group */ - fmovd %f48, %f0 /* FPA Group */ - fmuld %f32, %f32, %f2 /* FPM */ - clr %x4 /* IEU0 */ - faddd %f32, %f32, %f4 /* FPA Group */ - fmuld %f32, %f32, %f6 /* FPM */ - clr %x5 /* IEU0 */ - faddd %f32, %f32, %f8 /* FPA Group */ - fmuld %f32, %f32, %f10 /* FPM */ - clr %x6 /* IEU0 */ - faddd %f32, %f32, %f12 /* FPA Group */ - clr %x7 /* IEU0 */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - sub %dst, 64, %dst /* IEU0 */ - fmovd %f14, %f48 /* FPA */ +vis7s: add %src, 128 - 56, %src /* IEU0 Group */ + ldda [%src-72] %asi, %f14 /* Load Group */ + wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src-64] %asi, %f16 /* Load Group */ + fmovd %f48, %f0 /* FPA Group */ + fmuld %f32, %f32, %f2 /* FPM */ + clr %x4 /* IEU0 */ + faddd %f32, %f32, %f4 /* FPA Group */ + fmuld %f32, %f32, %f6 /* FPM */ + clr %x5 /* IEU0 */ + faddd %f32, %f32, %f8 /* FPA Group */ + fmuld %f32, %f32, %f10 /* FPM */ + clr %x6 /* IEU0 */ + faddd %f32, %f32, %f12 /* FPA Group */ + clr %x7 /* IEU0 */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + sub %dst, 64, %dst /* IEU0 */ + fmovd %f14, %f48 /* FPA */ vis7: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f50,f52,f54,f56,f58,f60,f62,f48,f48, ,LDBLK(f32), ,,,,,,,STBLK, @@ -779,112 +781,112 @@ vis7e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 e1: END_THE_TRICK1( f0,f2,f4,f6,f8,f10,f12,f14,f16,f6) e2: END_THE_TRICK1( f16,f18,f20,f22,f24,f26,f28,f30,f32,f6) e3: END_THE_TRICK1( f32,f34,f36,f38,f40,f42,f44,f46,f0,f6) -ett: rd %asi, %x4 /* LSU Group+4bubbles */ - rd %gsr, %x3 /* LSU Group+4bubbles */ +ett: rd %asi, %x4 /* LSU Group+4bubbles */ + rd %gsr, %x3 /* LSU Group+4bubbles */ #ifdef __KERNEL__ - srl %x4, 3, %x5 /* IEU0 Group */ - xor %x4, ASI_BLK_XOR1, %x4 /* IEU1 */ - wr %x4, %x5, %asi /* LSU Group+4bubbles */ + srl %x4, 3, %x5 /* IEU0 Group */ + xor %x4, ASI_BLK_XOR1, %x4 /* IEU1 */ + wr %x4, %x5, %asi /* LSU Group+4bubbles */ #else - wr %x4, ASI_BLK_XOR, %asi /* LSU Group+4bubbles */ + wr %x4, ASI_BLK_XOR, %asi /* LSU Group+4bubbles */ #endif - andcc %x3, 7, %x3 /* IEU1 Group */ - add %dst, 8, %dst /* IEU0 */ - bne,pn %icc, 1f /* CTI */ - fzero %f10 /* FPA */ - brz,a,pn %len, 2f /* CTI+IEU1 Group */ - std %f6, [%dst - 8] /* Store */ -1: cmp %len, 8 /* IEU1 */ - blu,pn %icc, 3f /* CTI */ - sub %src, 64, %src /* IEU0 Group */ -1: ldda [%src] %asi, %f2 /* Load Group */ - fpadd32 %f10, %f2, %f12 /* FPA Group+load stall */ - add %src, 8, %src /* IEU0 */ - add %dst, 8, %dst /* IEU1 */ - faligndata %f6, %f2, %f14 /* FPA Group */ - fcmpgt32 %f10, %f12, %x5 /* FPM Group */ - std %f14, [%dst - 16] /* Store */ - fmovd %f2, %f6 /* FPA */ - fmovd %f12, %f10 /* FPA Group */ - sub %len, 8, %len /* IEU1 */ - fzero %f16 /* FPA Group - FPU nop */ - fzero %f18 /* FPA Group - FPU nop */ - inc %x5 /* IEU0 */ - srl %x5, 1, %x5 /* IEU0 Group (regdep) */ - cmp %len, 8 /* IEU1 */ - bgeu,pt %icc, 1b /* CTI */ - add %x5, %sum, %sum /* IEU0 Group */ -3: brz,a,pt %x3, 2f /* CTI+IEU1 */ - std %f6, [%dst - 8] /* Store Group */ - st %f7, [%dst - 8] /* Store Group */ - sub %dst, 4, %dst /* IEU0 */ - add %len, 4, %len /* IEU1 */ + andcc %x3, 7, %x3 /* IEU1 Group */ + add %dst, 8, %dst /* IEU0 */ + bne,pn %icc, 1f /* CTI */ + fzero %f10 /* FPA */ + brz,a,pn %len, 2f /* CTI+IEU1 Group */ + std %f6, [%dst - 8] /* Store */ +1: cmp %len, 8 /* IEU1 */ + blu,pn %icc, 3f /* CTI */ + sub %src, 64, %src /* IEU0 Group */ +1: ldda [%src] %asi, %f2 /* Load Group */ + fpadd32 %f10, %f2, %f12 /* FPA Group+load stall*/ + add %src, 8, %src /* IEU0 */ + add %dst, 8, %dst /* IEU1 */ + faligndata %f6, %f2, %f14 /* FPA Group */ + fcmpgt32 %f10, %f12, %x5 /* FPM Group */ + std %f14, [%dst - 16] /* Store */ + fmovd %f2, %f6 /* FPA */ + fmovd %f12, %f10 /* FPA Group */ + sub %len, 8, %len /* IEU1 */ + fzero %f16 /* FPA Group - FPU nop */ + fzero %f18 /* FPA Group - FPU nop */ + inc %x5 /* IEU0 */ + srl %x5, 1, %x5 /* IEU0 Group (regdep) */ + cmp %len, 8 /* IEU1 */ + bgeu,pt %icc, 1b /* CTI */ + add %x5, %sum, %sum /* IEU0 Group */ +3: brz,a,pt %x3, 2f /* CTI+IEU1 */ + std %f6, [%dst - 8] /* Store Group */ + st %f7, [%dst - 8] /* Store Group */ + sub %dst, 4, %dst /* IEU0 */ + add %len, 4, %len /* IEU1 */ 2: #ifdef __KERNEL__ - sub %sp, 8, %sp /* IEU0 Group */ + sub %sp, 8, %sp /* IEU0 Group */ #endif END_THE_TRICK2( f48,f50,f52,f54,f56,f58,f60,f10,f12,f62) - membar #Sync /* LSU Group */ + membar #Sync /* LSU Group */ #ifdef __KERNEL__ VISExit - add %sp, 8, %sp /* IEU0 Group */ + add %sp, 8, %sp /* IEU0 Group */ #endif -23: brnz,pn %len, 26f /* CTI+IEU1 Group */ -24: sllx %sum, 32, %g1 /* IEU0 */ -25: addcc %sum, %g1, %src /* IEU1 Group */ - srlx %src, 32, %src /* IEU0 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %src, 1, %src /* IEU1 */ +23: brnz,pn %len, 26f /* CTI+IEU1 Group */ +24: sllx %sum, 32, %g1 /* IEU0 */ +25: addcc %sum, %g1, %src /* IEU1 Group */ + srlx %src, 32, %src /* IEU0 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %src, 1, %src /* IEU1 */ #ifndef __KERNEL__ -1: retl /* CTI Group brk forced */ - srl %src, 0, %src /* IEU0 */ +1: retl /* CTI Group brk forced*/ + srl %src, 0, %src /* IEU0 */ #else -1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */ - retl /* CTI Group brk forced */ - sllx %g4, 32, %g4 /* IEU0 */ +1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */ + retl /* CTI Group brk forced*/ + sllx %g4, 32, %g4 /* IEU0 */ #endif -26: andcc %len, 8, %g0 /* IEU1 Group */ - be,pn %icc, 1f /* CTI */ - lduwa [%src] %asi, %o4 /* Load */ - lduwa [%src+4] %asi, %g2 /* Load Group */ - add %src, 8, %src /* IEU0 */ - add %dst, 8, %dst /* IEU1 */ - sllx %o4, 32, %g5 /* IEU0 Group */ - stw %o4, [%dst - 8] /* Store */ - or %g5, %g2, %g5 /* IEU0 Group */ - stw %g2, [%dst - 4] /* Store */ - addcc %g5, %sum, %sum /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %sum, 1, %sum /* IEU0 */ -1: andcc %len, 4, %g0 /* IEU1 Group */ - be,a,pn %icc, 1f /* CTI */ - clr %g2 /* IEU0 */ - lduwa [%src] %asi, %g7 /* Load */ - add %src, 4, %src /* IEU0 Group */ - add %dst, 4, %dst /* IEU1 */ - sllx %g7, 32, %g2 /* IEU0 Group */ - stw %g7, [%dst - 4] /* Store */ -1: andcc %len, 2, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %g3 /* IEU0 Group */ - lduha [%src] %asi, %g7 /* Load */ - add %src, 2, %src /* IEU1 */ - add %dst, 2, %dst /* IEU0 Group */ - sll %g7, 16, %g3 /* IEU0 Group */ - sth %g7, [%dst - 2] /* Store */ -1: andcc %len, 1, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %o5 /* IEU0 Group */ - lduba [%src] %asi, %g7 /* Load */ - sll %g7, 8, %o5 /* IEU0 Group */ - stb %g7, [%dst] /* Store */ -1: or %g2, %g3, %g3 /* IEU1 */ - or %o5, %g3, %g3 /* IEU0 Group (regdep) */ - addcc %g3, %sum, %sum /* IEU1 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %sum, 1, %sum /* IEU0 */ -1: ba,pt %xcc, 25b /* CTI Group */ - sllx %sum, 32, %g1 /* IEU0 */ +26: andcc %len, 8, %g0 /* IEU1 Group */ + be,pn %icc, 1f /* CTI */ + lduwa [%src] %asi, %o4 /* Load */ + lduwa [%src+4] %asi, %g2 /* Load Group */ + add %src, 8, %src /* IEU0 */ + add %dst, 8, %dst /* IEU1 */ + sllx %o4, 32, %g5 /* IEU0 Group */ + stw %o4, [%dst - 8] /* Store */ + or %g5, %g2, %g5 /* IEU0 Group */ + stw %g2, [%dst - 4] /* Store */ + addcc %g5, %sum, %sum /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %sum, 1, %sum /* IEU0 */ +1: andcc %len, 4, %g0 /* IEU1 Group */ + be,a,pn %icc, 1f /* CTI */ + clr %g2 /* IEU0 */ + lduwa [%src] %asi, %g7 /* Load */ + add %src, 4, %src /* IEU0 Group */ + add %dst, 4, %dst /* IEU1 */ + sllx %g7, 32, %g2 /* IEU0 Group */ + stw %g7, [%dst - 4] /* Store */ +1: andcc %len, 2, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %g3 /* IEU0 Group */ + lduha [%src] %asi, %g7 /* Load */ + add %src, 2, %src /* IEU1 */ + add %dst, 2, %dst /* IEU0 Group */ + sll %g7, 16, %g3 /* IEU0 Group */ + sth %g7, [%dst - 2] /* Store */ +1: andcc %len, 1, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %o5 /* IEU0 Group */ + lduba [%src] %asi, %g7 /* Load */ + sll %g7, 8, %o5 /* IEU0 Group */ + stb %g7, [%dst] /* Store */ +1: or %g2, %g3, %g3 /* IEU1 */ + or %o5, %g3, %g3 /* IEU0 Group (regdep) */ + addcc %g3, %sum, %sum /* IEU1 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %sum, 1, %sum /* IEU0 */ +1: ba,pt %xcc, 25b /* CTI Group */ + sllx %sum, 32, %g1 /* IEU0 */ #ifdef __KERNEL__ end: diff --git a/arch/sparc64/lib/VIScsumcopyusr.S b/arch/sparc64/lib/VIScsumcopyusr.S index 17bbe78b1..4730a1c08 100644 --- a/arch/sparc64/lib/VIScsumcopyusr.S +++ b/arch/sparc64/lib/VIScsumcopyusr.S @@ -1,4 +1,4 @@ -/* $Id: VIScsumcopyusr.S,v 1.1 2000/01/19 04:06:04 davem Exp $ +/* $Id: VIScsumcopyusr.S,v 1.2 2000/02/20 23:21:40 davem Exp $ * VIScsumcopyusr.S: High bandwidth IP checksumming with simultaneous * copying utilizing the UltraSparc Visual Instruction Set. * @@ -91,358 +91,360 @@ #define DO_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14,DUMMY1,A0,A2,A4,A6,A8,A10,A12,A14,B14,DUMMY2,LOAD,STORE1,STORE2,STORE3,STORE4,STORE5,STORE6,STORE7,STORE8,DUMMY3,BRANCH...) \ - LOAD /* Load Group */; \ - faligndata %A14, %F0, %A14 /* FPA Group */; \ - inc %x5 /* IEU0 */; \ - STORE1 /* Store (optional) */; \ - faligndata %F0, %F2, %A0 /* FPA Group */; \ - srl %x5, 1, %x5 /* IEU0 */; \ - add %sum, %x4, %sum /* IEU1 */; \ - fpadd32 %F0, %f0, %F0 /* FPA Group */; \ - inc %x6 /* IEU0 */; \ - STORE2 /* Store (optional) */; \ - faligndata %F2, %F4, %A2 /* FPA Group */; \ - srl %x6, 1, %x6 /* IEU0 */; \ - add %sum, %x5, %sum /* IEU1 */; \ - fpadd32 %F2, %f2, %F2 /* FPA Group */; \ - add %src, 64, %src /* IEU0 */; \ - add %dst, 64, %dst /* IEU1 */; \ - fcmpgt32 %f0, %F0, %x1 /* FPM Group */; \ - inc %x7 /* IEU0 */; \ - STORE3 /* Store (optional) */; \ - faligndata %F4, %F6, %A4 /* FPA */; \ - srl %x7, 1, %x7 /* IEU0 Group */; \ - add %sum, %x6, %sum /* IEU1 */; \ - fpadd32 %F4, %f4, %F4 /* FPA */; \ - fcmpgt32 %f2, %F2, %x2 /* FPM Group */; \ - inc %x8 /* IEU0 */; \ - STORE4 /* Store (optional) */; \ - faligndata %F6, %F8, %A6 /* FPA */; \ - srl %x8, 1, %x8 /* IEU0 Group */; \ - add %sum, %x7, %sum /* IEU1 */; \ - fpadd32 %F6, %f6, %F6 /* FPA */; \ - fcmpgt32 %f4, %F4, %x3 /* FPM Group */; \ - inc %x1 /* IEU0 */; \ - STORE5 /* Store (optional) */; \ - faligndata %F8, %F10, %A8 /* FPA */; \ - srl %x1, 1, %x1 /* IEU0 Group */; \ - add %sum, %x8, %sum /* IEU1 */; \ - fpadd32 %F8, %f8, %F8 /* FPA */; \ - fcmpgt32 %f6, %F6, %x4 /* FPM Group */; \ - inc %x2 /* IEU0 */; \ - STORE6 /* Store (optional) */; \ - faligndata %F10, %F12, %A10 /* FPA */; \ - srl %x2, 1, %x2 /* IEU0 Group */; \ - add %sum, %x1, %sum /* IEU1 */; \ - fpadd32 %F10, %f10, %F10 /* FPA */; \ - fcmpgt32 %f8, %F8, %x5 /* FPM Group */; \ - inc %x3 /* IEU0 */; \ - STORE7 /* Store (optional) */; \ - faligndata %F12, %F14, %A12 /* FPA */; \ - srl %x3, 1, %x3 /* IEU0 Group */; \ - add %sum, %x2, %sum /* IEU1 */; \ - fpadd32 %F12, %f12, %F12 /* FPA */; \ - fcmpgt32 %f10, %F10, %x6 /* FPM Group */; \ - inc %x4 /* IEU0 */; \ - STORE8 /* Store (optional) */; \ - fmovd %F14, %B14 /* FPA */; \ - srl %x4, 1, %x4 /* IEU0 Group */; \ - add %sum, %x3, %sum /* IEU1 */; \ - fpadd32 %F14, %f14, %F14 /* FPA */; \ - fcmpgt32 %f12, %F12, %x7 /* FPM Group */; \ - subcc %len, 64, %len /* IEU1 */; \ - BRANCH /* CTI */; \ - fcmpgt32 %f14, %F14, %x8 /* FPM Group */; \ + LOAD /* Load (Group) */; \ + faligndata %A14, %F0, %A14 /* FPA Group */; \ + inc %x5 /* IEU0 */; \ + STORE1 /* Store (optional) */; \ + faligndata %F0, %F2, %A0 /* FPA Group */; \ + srl %x5, 1, %x5 /* IEU0 */; \ + add %sum, %x4, %sum /* IEU1 */; \ + fpadd32 %F0, %f0, %F0 /* FPA Group */; \ + inc %x6 /* IEU0 */; \ + STORE2 /* Store (optional) */; \ + faligndata %F2, %F4, %A2 /* FPA Group */; \ + srl %x6, 1, %x6 /* IEU0 */; \ + add %sum, %x5, %sum /* IEU1 */; \ + fpadd32 %F2, %f2, %F2 /* FPA Group */; \ + add %src, 64, %src /* IEU0 */; \ + fcmpgt32 %f0, %F0, %x1 /* FPM */; \ + add %dst, 64, %dst /* IEU1 Group */; \ + inc %x7 /* IEU0 */; \ + STORE3 /* Store (optional) */; \ + faligndata %F4, %F6, %A4 /* FPA */; \ + fpadd32 %F4, %f4, %F4 /* FPA Group */; \ + add %sum, %x6, %sum /* IEU1 */; \ + fcmpgt32 %f2, %F2, %x2 /* FPM */; \ + srl %x7, 1, %x7 /* IEU0 Group */; \ + inc %x8 /* IEU1 */; \ + STORE4 /* Store (optional) */; \ + faligndata %F6, %F8, %A6 /* FPA */; \ + fpadd32 %F6, %f6, %F6 /* FPA Group */; \ + srl %x8, 1, %x8 /* IEU0 */; \ + fcmpgt32 %f4, %F4, %x3 /* FPM */; \ + add %sum, %x7, %sum /* IEU0 Group */; \ + inc %x1 /* IEU1 */; \ + STORE5 /* Store (optional) */; \ + faligndata %F8, %F10, %A8 /* FPA */; \ + fpadd32 %F8, %f8, %F8 /* FPA Group */; \ + srl %x1, 1, %x1 /* IEU0 */; \ + fcmpgt32 %f6, %F6, %x4 /* FPM */; \ + add %sum, %x8, %sum /* IEU0 Group */; \ + inc %x2 /* IEU1 */; \ + STORE6 /* Store (optional) */; \ + faligndata %F10, %F12, %A10 /* FPA */; \ + fpadd32 %F10, %f10, %F10 /* FPA Group */; \ + srl %x2, 1, %x2 /* IEU0 */; \ + fcmpgt32 %f8, %F8, %x5 /* FPM */; \ + add %sum, %x1, %sum /* IEU0 Group */; \ + inc %x3 /* IEU1 */; \ + STORE7 /* Store (optional) */; \ + faligndata %F12, %F14, %A12 /* FPA */; \ + fpadd32 %F12, %f12, %F12 /* FPA Group */; \ + srl %x3, 1, %x3 /* IEU0 */; \ + fcmpgt32 %f10, %F10, %x6 /* FPM */; \ + add %sum, %x2, %sum /* IEU0 Group */; \ + inc %x4 /* IEU1 */; \ + STORE8 /* Store (optional) */; \ + fmovd %F14, %B14 /* FPA */; \ + fpadd32 %F14, %f14, %F14 /* FPA Group */; \ + srl %x4, 1, %x4 /* IEU0 */; \ + fcmpgt32 %f12, %F12, %x7 /* FPM */; \ + add %sum, %x3, %sum /* IEU0 Group */; \ + subcc %len, 64, %len /* IEU1 */; \ + BRANCH /* CTI */; \ + fcmpgt32 %f14, %F14, %x8 /* FPM Group */; #define END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,S0,S1,S2,S3,T0,T1,U0,fz) \ - inc %x5 /* IEU0 Group */; \ - fpadd32 %f2, %f0, %S0 /* FPA */; \ - srl %x5, 1, %x5 /* IEU0 Group */; \ - add %sum, %x4, %sum /* IEU1 */; \ - fpadd32 %f6, %f4, %S1 /* FPA */; \ - inc %x6 /* IEU0 Group */; \ - add %sum, %x5, %sum /* IEU1 */; \ - fcmpgt32 %f0, %S0, %x1 /* FPM Group */; \ - srl %x6, 1, %x6 /* IEU0 */; \ - inc %x7 /* IEU1 */; \ - fpadd32 %f10, %f8, %S2 /* FPA */; \ - fcmpgt32 %f4, %S1, %x2 /* FPM Group */; \ - srl %x7, 1, %x7 /* IEU0 */; \ - add %sum, %x6, %sum /* IEU1 */; \ - fpadd32 %f14, %f12, %S3 /* FPA */; \ - inc %x8 /* IEU0 Group */; \ - add %sum, %x7, %sum /* IEU1 */; \ - fzero %fz /* FPA */; \ - fcmpgt32 %f8, %S2, %x3 /* FPM Group */; \ - srl %x8, 1, %x8 /* IEU0 */; \ - inc %x1 /* IEU1 */; \ - fpadd32 %S0, %S1, %T0 /* FPA */; \ - fcmpgt32 %f12, %S3, %x4 /* FPM Group */; \ - srl %x1, 1, %x1 /* IEU0 */; \ - add %sum, %x8, %sum /* IEU1 */; \ - fpadd32 %S2, %S3, %T1 /* FPA */; \ - inc %x2 /* IEU0 Group */; \ - add %sum, %x1, %sum /* IEU1 */; \ - fcmpgt32 %S0, %T0, %x5 /* FPM Group */; \ - srl %x2, 1, %x2 /* IEU0 */; \ - inc %x3 /* IEU1 */; \ - fcmpgt32 %S2, %T1, %x6 /* FPM Group */; \ - srl %x3, 1, %x3 /* IEU0 */; \ - add %sum, %x2, %sum /* IEU1 */; \ - inc %x4 /* IEU0 Group */; \ - add %sum, %x3, %sum /* IEU1 */; \ - fcmpgt32 %fz, %f2, %x7 /* FPM Group */; \ - srl %x4, 1, %x4 /* IEU0 */; \ - inc %x5 /* IEU1 */; \ - fpadd32 %T0, %T1, %U0 /* FPA */; \ - fcmpgt32 %fz, %f6, %x8 /* FPM Group */; \ - srl %x5, 1, %x5 /* IEU0 */; \ - add %sum, %x4, %sum /* IEU1 */; \ - inc %x6 /* IEU0 Group */; \ - add %sum, %x5, %sum /* IEU1 */; \ - fcmpgt32 %fz, %f10, %x1 /* FPM Group */; \ - srl %x6, 1, %x6 /* IEU0 */; \ - inc %x7 /* IEU1 */; \ - fcmpgt32 %fz, %f14, %x2 /* FPM Group */; \ - ba,pt %xcc, ett /* CTI */; \ - fmovd %FA, %FB /* FPA */; \ + inc %x5 /* IEU0 Group */; \ + fpadd32 %f2, %f0, %S0 /* FPA */; \ + add %sum, %x4, %sum /* IEU1 */; \ + srl %x5, 1, %x5 /* IEU0 Group */; \ + fpadd32 %f6, %f4, %S1 /* FPA */; \ + inc %x6 /* IEU1 */; \ + fpadd32 %f10, %f8, %S2 /* FPA Group */; \ + add %sum, %x5, %sum /* IEU0 */; \ + fcmpgt32 %f0, %S0, %x1 /* FPM */; \ + fpadd32 %f14, %f12, %S3 /* FPA Group */; \ + srl %x6, 1, %x6 /* IEU0 */; \ + fcmpgt32 %f4, %S1, %x2 /* FPM */; \ + add %sum, %x6, %sum /* IEU0 Group */; \ + fzero %fz /* FPA */; \ + fcmpgt32 %f8, %S2, %x3 /* FPM */; \ + inc %x7 /* IEU0 Group */; \ + inc %x8 /* IEU1 */; \ + srl %x7, 1, %x7 /* IEU0 Group */; \ + inc %x1 /* IEU1 */; \ + fpadd32 %S0, %S1, %T0 /* FPA */; \ + fpadd32 %S2, %S3, %T1 /* FPA Group */; \ + add %sum, %x7, %sum /* IEU0 */; \ + fcmpgt32 %f12, %S3, %x4 /* FPM */; \ + srl %x8, 1, %x8 /* IEU0 Group */; \ + inc %x2 /* IEU1 */; \ + srl %x1, 1, %x1 /* IEU0 Group */; \ + add %sum, %x8, %sum /* IEU1 */; \ + add %sum, %x1, %sum /* IEU0 Group */; \ + fcmpgt32 %S0, %T0, %x5 /* FPM */; \ + srl %x2, 1, %x2 /* IEU0 Group */; \ + fcmpgt32 %S2, %T1, %x6 /* FPM */; \ + inc %x3 /* IEU0 Group */; \ + add %sum, %x2, %sum /* IEU1 */; \ + srl %x3, 1, %x3 /* IEU0 Group */; \ + inc %x4 /* IEU1 */; \ + fpadd32 %T0, %T1, %U0 /* FPA Group */; \ + add %sum, %x3, %sum /* IEU0 */; \ + fcmpgt32 %fz, %f2, %x7 /* FPM */; \ + srl %x4, 1, %x4 /* IEU0 Group */; \ + fcmpgt32 %fz, %f6, %x8 /* FPM */; \ + inc %x5 /* IEU0 Group */; \ + add %sum, %x4, %sum /* IEU1 */; \ + srl %x5, 1, %x5 /* IEU0 Group */; \ + fcmpgt32 %fz, %f10, %x1 /* FPM */; \ + inc %x6 /* IEU0 Group */; \ + add %sum, %x5, %sum /* IEU1 */; \ + fmovd %FA, %FB /* FPA Group */; \ + fcmpgt32 %fz, %f14, %x2 /* FPM */; \ + srl %x6, 1, %x6 /* IEU0 Group */; \ + ba,pt %xcc, ett /* CTI */; \ + inc %x7 /* IEU1 */; -#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \ +#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \ END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,f48,f50,f52,f54,f56,f58,f60,f62) -#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \ - fpadd32 %U0, %U1, %V0 /* FPA Group */; \ - srl %x7, 1, %x7 /* IEU0 */; \ - add %sum, %x6, %sum /* IEU1 */; \ - std %V0, [%sp + STACKOFF] /* Store Group */; \ - inc %x8 /* IEU0 */; \ - sub %sum, %x7, %sum /* IEU1 */; \ - fcmpgt32 %fz, %S1, %x3 /* FPM Group */; \ - srl %x8, 1, %x8 /* IEU0 */; \ - inc %x1 /* IEU1 */; \ - fcmpgt32 %fz, %S3, %x4 /* FPM Group */; \ - srl %x1, 1, %x1 /* IEU0 */; \ - sub %sum, %x8, %sum /* IEU1 */; \ - ldx [%sp + STACKOFF], %x8 /* Load Group */; \ - inc %x2 /* IEU0 */; \ - sub %sum, %x1, %sum /* IEU1 */; \ - fcmpgt32 %fz, %T1, %x5 /* FPM Group */; \ - srl %x2, 1, %x2 /* IEU0 */; \ - inc %x3 /* IEU1 */; \ - fcmpgt32 %T0, %U0, %x6 /* FPM Group */; \ - srl %x3, 1, %x3 /* IEU0 */; \ - sub %sum, %x2, %sum /* IEU1 */; \ - inc %x4 /* IEU0 Group */; \ - sub %sum, %x3, %sum /* IEU1 */; \ - fcmpgt32 %fz, %U1, %x7 /* FPM Group */; \ - srl %x4, 1, %x4 /* IEU0 */; \ - inc %x5 /* IEU1 */; \ - fcmpgt32 %U0, %V0, %x1 /* FPM Group */; \ - srl %x5, 1, %x5 /* IEU0 */; \ - sub %sum, %x4, %sum /* IEU1 */; \ - fcmpgt32 %fz, %V0, %x2 /* FPM Group */; \ - inc %x6 /* IEU0 */; \ - sub %sum, %x5, %sum /* IEU1 */; \ - srl %x6, 1, %x6 /* IEU0 Group */; \ - inc %x7 /* IEU1 */; \ - srl %x7, 1, %x7 /* IEU0 Group */; \ - add %sum, %x6, %sum /* IEU1 */; \ - inc %x1 /* IEU0 Group */; \ - sub %sum, %x7, %sum /* IEU1 */; \ - srl %x1, 1, %x1 /* IEU0 Group */; \ - inc %x2 /* IEU1 */; \ - srl %x2, 1, %x2 /* IEU0 Group */; \ - add %sum, %x1, %sum /* IEU1 */; \ - sub %sum, %x2, %sum /* IEU0 Group */; \ - addcc %sum, %x8, %sum /* IEU Group */; \ - bcs,a,pn %xcc, 33f /* CTI */; \ - add %sum, 1, %sum /* IEU0 */; \ -33: /* That's it */; +#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \ + fpadd32 %U0, %U1, %V0 /* FPA Group */; \ + srl %x7, 1, %x7 /* IEU0 */; \ + add %sum, %x6, %sum /* IEU1 */; \ + std %V0, [%sp + STACKOFF] /* Store Group */; \ + inc %x8 /* IEU0 */; \ + sub %sum, %x7, %sum /* IEU1 */; \ + srl %x8, 1, %x8 /* IEU0 Group */; \ + fcmpgt32 %fz, %S1, %x3 /* FPM */; \ + inc %x1 /* IEU0 Group */; \ + fcmpgt32 %fz, %S3, %x4 /* FPM */; \ + srl %x1, 1, %x1 /* IEU0 Group */; \ + sub %sum, %x8, %sum /* IEU1 */; \ + ldx [%sp + STACKOFF], %x8 /* Load Group */; \ + inc %x2 /* IEU0 */; \ + sub %sum, %x1, %sum /* IEU1 */; \ + srl %x2, 1, %x2 /* IEU0 Group */; \ + fcmpgt32 %fz, %T1, %x5 /* FPM */; \ + inc %x3 /* IEU0 Group */; \ + fcmpgt32 %T0, %U0, %x6 /* FPM */; \ + srl %x3, 1, %x3 /* IEU0 Group */; \ + sub %sum, %x2, %sum /* IEU1 */; \ + inc %x4 /* IEU0 Group */; \ + sub %sum, %x3, %sum /* IEU1 */; \ + srl %x4, 1, %x4 /* IEU0 Group */; \ + fcmpgt32 %fz, %U1, %x7 /* FPM */; \ + inc %x5 /* IEU0 Group */; \ + fcmpgt32 %U0, %V0, %x1 /* FPM */; \ + srl %x5, 1, %x5 /* IEU0 Group */; \ + sub %sum, %x4, %sum /* IEU1 */; \ + sub %sum, %x5, %sum /* IEU0 Group */; \ + fcmpgt32 %fz, %V0, %x2 /* FPM */; \ + inc %x6 /* IEU0 Group */; \ + inc %x7 /* IEU1 */; \ + srl %x6, 1, %x6 /* IEU0 Group */; \ + inc %x1 /* IEU1 */; \ + srl %x7, 1, %x7 /* IEU0 Group */; \ + add %sum, %x6, %sum /* IEU1 */; \ + srl %x1, 1, %x1 /* IEU0 Group */; \ + sub %sum, %x7, %sum /* IEU1 */; \ + inc %x2 /* IEU0 Group */; \ + add %sum, %x1, %sum /* IEU1 */; \ + srl %x2, 1, %x2 /* IEU0 Group */; \ + sub %sum, %x2, %sum /* IEU0 Group */; \ + addcc %sum, %x8, %sum /* IEU1 Group */; \ + bcs,a,pn %xcc, 33f /* CTI */; \ + add %sum, 1, %sum /* IEU0 (Group) */; \ +33: /* That's it */; .text .globl csum_partial_copy_user_vis .align 32 -/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp. csum_partial_copy_from_user */ -/* This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256 */ +/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp. + * csum_partial_copy_from_user + * This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256 + */ csum_partial_copy_user_vis: - andcc %dst, 7, %g0 /* IEU1 Group */ - be,pt %icc, 4f /* CTI */ - and %dst, 0x38, %o4 /* IEU0 */ - mov 1, %g5 /* IEU0 Group */ - andcc %dst, 2, %g0 /* IEU1 */ - be,pt %icc, 1f /* CTI */ - and %dst, 4, %g7 /* IEU0 Group */ - lduh [%src], %g2 /* Load */ - sub %len, 2, %len /* IEU0 Group */ - add %dst, 2, %dst /* IEU1 */ - andcc %dst, 4, %g7 /* IEU1 Group */ - sll %g5, 16, %g5 /* IEU0 */ - stha %g2, [%dst - 2] %asi /* Store Group */ - sll %g2, 16, %g2 /* IEU0 */ - add %src, 2, %src /* IEU1 */ - addcc %g2, %sum, %sum /* IEU1 Group */ - bcs,a,pn %icc, 1f /* CTI */ - add %sum, %g5, %sum /* IEU0 */ -1: lduw [%src], %g2 /* Load */ - brz,a,pn %g7, 4f /* CTI+IEU1 Group */ - and %dst, 0x38, %o4 /* IEU0 */ - add %dst, 4, %dst /* IEU0 Group */ - sub %len, 4, %len /* IEU1 */ - addcc %g2, %sum, %sum /* IEU1 Group */ - bcs,a,pn %icc, 1f /* CTI */ - add %sum, 1, %sum /* IEU0 */ -1: and %dst, 0x38, %o4 /* IEU0 Group */ - stwa %g2, [%dst - 4] %asi /* Store */ - add %src, 4, %src /* IEU1 */ + andcc %dst, 7, %g0 /* IEU1 Group */ + be,pt %icc, 4f /* CTI */ + and %dst, 0x38, %o4 /* IEU0 */ + mov 1, %g5 /* IEU0 Group */ + andcc %dst, 2, %g0 /* IEU1 */ + be,pt %icc, 1f /* CTI */ + and %dst, 4, %g7 /* IEU0 Group */ + lduh [%src], %g2 /* Load */ + sub %len, 2, %len /* IEU0 Group */ + add %dst, 2, %dst /* IEU1 */ + andcc %dst, 4, %g7 /* IEU1 Group */ + sll %g5, 16, %g5 /* IEU0 */ + stha %g2, [%dst - 2] %asi /* Store Group */ + sll %g2, 16, %g2 /* IEU0 */ + add %src, 2, %src /* IEU1 */ + addcc %g2, %sum, %sum /* IEU1 Group */ + bcs,a,pn %icc, 1f /* CTI */ + add %sum, %g5, %sum /* IEU0 */ +1: lduw [%src], %g2 /* Load */ + brz,a,pn %g7, 4f /* CTI+IEU1 Group */ + and %dst, 0x38, %o4 /* IEU0 */ + add %dst, 4, %dst /* IEU0 Group */ + sub %len, 4, %len /* IEU1 */ + addcc %g2, %sum, %sum /* IEU1 Group */ + bcs,a,pn %icc, 1f /* CTI */ + add %sum, 1, %sum /* IEU0 */ +1: and %dst, 0x38, %o4 /* IEU0 Group */ + stwa %g2, [%dst - 4] %asi /* Store */ + add %src, 4, %src /* IEU1 */ 4: #ifdef __KERNEL__ VISEntry #endif - mov %src, %g7 /* IEU1 Group */ - fzero %f48 /* FPA */ - alignaddr %src, %g0, %src /* Single Group */ - subcc %g7, %src, %g7 /* IEU1 Group */ - be,pt %xcc, 1f /* CTI */ - mov 0x40, %g1 /* IEU0 */ - lduw [%src], %g2 /* Load Group */ - subcc %sum, %g2, %sum /* IEU1 Group+load stall */ - bcs,a,pn %icc, 1f /* CTI */ - sub %sum, 1, %sum /* IEU0 */ -1: srl %sum, 0, %sum /* IEU0 Group */ - clr %g5 /* IEU1 */ - brz,pn %o4, 3f /* CTI+IEU1 Group */ - sub %g1, %o4, %g1 /* IEU0 */ - ldd [%src], %f0 /* Load */ - clr %o4 /* IEU0 Group */ - andcc %dst, 8, %g0 /* IEU1 */ - be,pn %icc, 1f /* CTI */ - ldd [%src + 8], %f2 /* Load Group */ - add %src, 8, %src /* IEU0 */ - sub %len, 8, %len /* IEU1 */ - fpadd32 %f0, %f48, %f50 /* FPA */ - addcc %dst, 8, %dst /* IEU1 Group */ - faligndata %f0, %f2, %f16 /* FPA */ - fcmpgt32 %f48, %f50, %o4 /* FPM Group */ - fmovd %f2, %f0 /* FPA Group */ - ldd [%src + 8], %f2 /* Load */ - stda %f16, [%dst - 8] %asi /* Store */ - fmovd %f50, %f48 /* FPA */ -1: andcc %g1, 0x10, %g0 /* IEU1 Group */ - be,pn %icc, 1f /* CTI */ - and %g1, 0x20, %g1 /* IEU0 */ - fpadd32 %f0, %f48, %f50 /* FPA */ - ldd [%src + 16], %f4 /* Load Group */ - add %src, 16, %src /* IEU0 */ - add %dst, 16, %dst /* IEU1 */ - faligndata %f0, %f2, %f16 /* FPA */ - fcmpgt32 %f48, %f50, %g5 /* FPM Group */ - sub %len, 16, %len /* IEU0 */ - inc %o4 /* IEU1 */ - stda %f16, [%dst - 16] %asi /* Store Group */ - fpadd32 %f2, %f50, %f48 /* FPA */ - srl %o4, 1, %o5 /* IEU0 */ - faligndata %f2, %f4, %f18 /* FPA Group */ - stda %f18, [%dst - 8] %asi /* Store */ - fcmpgt32 %f50, %f48, %o4 /* FPM Group */ - add %o5, %sum, %sum /* IEU0 */ - ldd [%src + 8], %f2 /* Load */ - fmovd %f4, %f0 /* FPA */ -1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */ - rd %asi, %g2 /* LSU Group + 4 bubbles */ - inc %g5 /* IEU0 */ - fpadd32 %f0, %f48, %f50 /* FPA */ - ldd [%src + 16], %f4 /* Load Group */ - srl %g5, 1, %g5 /* IEU0 */ - add %dst, 32, %dst /* IEU1 */ - faligndata %f0, %f2, %f16 /* FPA */ - fcmpgt32 %f48, %f50, %o5 /* FPM Group */ - inc %o4 /* IEU0 */ - ldd [%src + 24], %f6 /* Load */ - srl %o4, 1, %o4 /* IEU0 Group */ - add %g5, %sum, %sum /* IEU1 */ - ldd [%src + 32], %f8 /* Load */ - fpadd32 %f2, %f50, %f48 /* FPA */ - faligndata %f2, %f4, %f18 /* FPA Group */ - sub %len, 32, %len /* IEU0 */ - stda %f16, [%dst - 32] %asi /* Store */ - fcmpgt32 %f50, %f48, %g3 /* FPM Group */ - inc %o5 /* IEU0 */ - add %o4, %sum, %sum /* IEU1 */ - fpadd32 %f4, %f48, %f50 /* FPA */ - faligndata %f4, %f6, %f20 /* FPA Group */ - srl %o5, 1, %o5 /* IEU0 */ - fcmpgt32 %f48, %f50, %g5 /* FPM Group */ - add %o5, %sum, %sum /* IEU0 */ - stda %f18, [%dst - 24] %asi /* Store */ - fpadd32 %f6, %f50, %f48 /* FPA */ - inc %g3 /* IEU0 Group */ - stda %f20, [%dst - 16] %asi /* Store */ - add %src, 32, %src /* IEU1 */ - faligndata %f6, %f8, %f22 /* FPA */ - fcmpgt32 %f50, %f48, %o4 /* FPM Group */ - srl %g3, 1, %g3 /* IEU0 */ - stda %f22, [%dst - 8] %asi /* Store */ - add %g3, %sum, %sum /* IEU0 Group */ -3: rd %asi, %g2 /* LSU Group + 4 bubbles */ + mov %src, %g7 /* IEU1 Group */ + fzero %f48 /* FPA */ + alignaddr %src, %g0, %src /* Single Group */ + subcc %g7, %src, %g7 /* IEU1 Group */ + be,pt %xcc, 1f /* CTI */ + mov 0x40, %g1 /* IEU0 */ + lduw [%src], %g2 /* Load Group */ + subcc %sum, %g2, %sum /* IEU1 Group+load stall*/ + bcs,a,pn %icc, 1f /* CTI */ + sub %sum, 1, %sum /* IEU0 */ +1: srl %sum, 0, %sum /* IEU0 Group */ + clr %g5 /* IEU1 */ + brz,pn %o4, 3f /* CTI+IEU1 Group */ + sub %g1, %o4, %g1 /* IEU0 */ + ldd [%src], %f0 /* Load */ + clr %o4 /* IEU0 Group */ + andcc %dst, 8, %g0 /* IEU1 */ + be,pn %icc, 1f /* CTI */ + ldd [%src + 8], %f2 /* Load Group */ + add %src, 8, %src /* IEU0 */ + sub %len, 8, %len /* IEU1 */ + fpadd32 %f0, %f48, %f50 /* FPA */ + addcc %dst, 8, %dst /* IEU1 Group */ + faligndata %f0, %f2, %f16 /* FPA */ + fcmpgt32 %f48, %f50, %o4 /* FPM Group */ + fmovd %f2, %f0 /* FPA Group */ + ldd [%src + 8], %f2 /* Load */ + stda %f16, [%dst - 8] %asi /* Store */ + fmovd %f50, %f48 /* FPA */ +1: andcc %g1, 0x10, %g0 /* IEU1 Group */ + be,pn %icc, 1f /* CTI */ + and %g1, 0x20, %g1 /* IEU0 */ + fpadd32 %f0, %f48, %f50 /* FPA */ + ldd [%src + 16], %f4 /* Load Group */ + add %src, 16, %src /* IEU0 */ + add %dst, 16, %dst /* IEU1 */ + faligndata %f0, %f2, %f16 /* FPA */ + fcmpgt32 %f48, %f50, %g5 /* FPM Group */ + sub %len, 16, %len /* IEU0 */ + inc %o4 /* IEU1 */ + stda %f16, [%dst - 16] %asi /* Store Group */ + fpadd32 %f2, %f50, %f48 /* FPA */ + srl %o4, 1, %o5 /* IEU0 */ + faligndata %f2, %f4, %f18 /* FPA Group */ + stda %f18, [%dst - 8] %asi /* Store */ + fcmpgt32 %f50, %f48, %o4 /* FPM Group */ + add %o5, %sum, %sum /* IEU0 */ + ldd [%src + 8], %f2 /* Load */ + fmovd %f4, %f0 /* FPA */ +1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */ + rd %asi, %g2 /* LSU Group + 4 bubbles*/ + inc %g5 /* IEU0 */ + fpadd32 %f0, %f48, %f50 /* FPA */ + ldd [%src + 16], %f4 /* Load Group */ + srl %g5, 1, %g5 /* IEU0 */ + add %dst, 32, %dst /* IEU1 */ + faligndata %f0, %f2, %f16 /* FPA */ + fcmpgt32 %f48, %f50, %o5 /* FPM Group */ + inc %o4 /* IEU0 */ + ldd [%src + 24], %f6 /* Load */ + srl %o4, 1, %o4 /* IEU0 Group */ + add %g5, %sum, %sum /* IEU1 */ + ldd [%src + 32], %f8 /* Load */ + fpadd32 %f2, %f50, %f48 /* FPA */ + faligndata %f2, %f4, %f18 /* FPA Group */ + sub %len, 32, %len /* IEU0 */ + stda %f16, [%dst - 32] %asi /* Store */ + fcmpgt32 %f50, %f48, %g3 /* FPM Group */ + inc %o5 /* IEU0 */ + add %o4, %sum, %sum /* IEU1 */ + fpadd32 %f4, %f48, %f50 /* FPA */ + faligndata %f4, %f6, %f20 /* FPA Group */ + srl %o5, 1, %o5 /* IEU0 */ + fcmpgt32 %f48, %f50, %g5 /* FPM Group */ + add %o5, %sum, %sum /* IEU0 */ + stda %f18, [%dst - 24] %asi /* Store */ + fpadd32 %f6, %f50, %f48 /* FPA */ + inc %g3 /* IEU0 Group */ + stda %f20, [%dst - 16] %asi /* Store */ + add %src, 32, %src /* IEU1 */ + faligndata %f6, %f8, %f22 /* FPA */ + fcmpgt32 %f50, %f48, %o4 /* FPM Group */ + srl %g3, 1, %g3 /* IEU0 */ + stda %f22, [%dst - 8] %asi /* Store */ + add %g3, %sum, %sum /* IEU0 Group */ +3: rd %asi, %g2 /* LSU Group + 4 bubbles*/ #ifdef __KERNEL__ -4: sethi %hi(vis0s), %g7 /* IEU0 Group */ - or %g2, ASI_BLK_OR, %g2 /* IEU1 */ +4: sethi %hi(vis0s), %g7 /* IEU0 Group */ + or %g2, ASI_BLK_OR, %g2 /* IEU1 */ #else -4: rd %pc, %g7 /* LSU Group + 4 bubbles */ +4: rd %pc, %g7 /* LSU Group + 4 bubbles*/ #endif - inc %g5 /* IEU0 Group */ - and %src, 0x38, %g3 /* IEU1 */ - membar #StoreLoad /* LSU Group */ - srl %g5, 1, %g5 /* IEU0 */ - inc %o4 /* IEU1 */ - sll %g3, 8, %g3 /* IEU0 Group */ - sub %len, 0xc0, %len /* IEU1 */ - addcc %g5, %sum, %sum /* IEU1 Group */ - srl %o4, 1, %o4 /* IEU0 */ - add %g7, %g3, %g7 /* IEU0 Group */ - add %o4, %sum, %sum /* IEU1 */ + inc %g5 /* IEU0 Group */ + and %src, 0x38, %g3 /* IEU1 */ + membar #StoreLoad /* LSU Group */ + srl %g5, 1, %g5 /* IEU0 */ + inc %o4 /* IEU1 */ + sll %g3, 8, %g3 /* IEU0 Group */ + sub %len, 0xc0, %len /* IEU1 */ + addcc %g5, %sum, %sum /* IEU1 Group */ + srl %o4, 1, %o4 /* IEU0 */ + add %g7, %g3, %g7 /* IEU0 Group */ + add %o4, %sum, %sum /* IEU1 */ #ifdef __KERNEL__ - jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */ + jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */ #else - jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */ + jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */ #endif - fzero %f32 /* FPA */ + fzero %f32 /* FPA */ .align 2048 -vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src] ASI_BLK_P, %f0 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f48, %f62 /* FPA Group f0 available */ - faligndata %f0, %f2, %f48 /* FPA Group f2 available */ - fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available */ - fpadd32 %f0, %f62, %f0 /* FPA */ - fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available */ - faligndata %f2, %f4, %f50 /* FPA */ - fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available */ - faligndata %f4, %f6, %f52 /* FPA */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available */ - inc %x1 /* IEU0 */ - faligndata %f6, %f8, %f54 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available */ - srl %x1, 1, %x1 /* IEU0 */ - inc %x2 /* IEU1 */ - faligndata %f8, %f10, %f56 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available */ - srl %x2, 1, %x2 /* IEU0 */ - add %sum, %x1, %sum /* IEU1 */ - faligndata %f10, %f12, %f58 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - inc %x3 /* IEU0 */ - add %sum, %x2, %sum /* IEU1 */ - faligndata %f12, %f14, %f60 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - srl %x3, 1, %x3 /* IEU0 */ - inc %x4 /* IEU1 */ - fmovd %f14, %f62 /* FPA */ - srl %x4, 1, %x4 /* IEU0 Group */ - add %sum, %x3, %sum /* IEU1 */ +vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src] ASI_BLK_P, %f0 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f48, %f62 /* FPA Group f0 available*/ + faligndata %f0, %f2, %f48 /* FPA Group f2 available*/ + fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available*/ + fpadd32 %f0, %f62, %f0 /* FPA */ + fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available*/ + faligndata %f2, %f4, %f50 /* FPA */ + fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available*/ + faligndata %f4, %f6, %f52 /* FPA */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available*/ + inc %x1 /* IEU0 */ + faligndata %f6, %f8, %f54 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available*/ + srl %x1, 1, %x1 /* IEU0 */ + inc %x2 /* IEU1 */ + faligndata %f8, %f10, %f56 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available*/ + srl %x2, 1, %x2 /* IEU0 */ + add %sum, %x1, %sum /* IEU1 */ + faligndata %f10, %f12, %f58 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + inc %x3 /* IEU0 */ + add %sum, %x2, %sum /* IEU1 */ + faligndata %f12, %f14, %f60 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + srl %x3, 1, %x3 /* IEU0 */ + inc %x4 /* IEU1 */ + fmovd %f14, %f62 /* FPA */ + srl %x4, 1, %x4 /* IEU0 Group */ + add %sum, %x3, %sum /* IEU1 */ vis0: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f48,f50,f52,f54,f56,f58,f60,f62,f62, ,LDBLK(f32), STBLK,,,,,,,, @@ -468,36 +470,36 @@ vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, STBLK_XORASI(x1,x2),ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e1) .align 2048 -vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - sub %src, 8, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f0 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f0, %f58 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - fcmpgt32 %f32, %f2, %x2 /* FPM Group */ - faligndata %f2, %f4, %f48 /* FPA */ - fcmpgt32 %f32, %f4, %x3 /* FPM Group */ - faligndata %f4, %f6, %f50 /* FPA */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group */ - faligndata %f6, %f8, %f52 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - inc %x2 /* IEU1 */ - faligndata %f8, %f10, %f54 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - srl %x2, 1, %x2 /* IEU0 */ - faligndata %f10, %f12, %f56 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - inc %x3 /* IEU0 */ - add %sum, %x2, %sum /* IEU1 */ - faligndata %f12, %f14, %f58 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - srl %x3, 1, %x3 /* IEU0 */ - inc %x4 /* IEU1 */ - fmovd %f14, %f60 /* FPA */ - srl %x4, 1, %x4 /* IEU0 Group */ - add %sum, %x3, %sum /* IEU1 */ +vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + sub %src, 8, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f0 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f0, %f58 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + fcmpgt32 %f32, %f2, %x2 /* FPM Group */ + faligndata %f2, %f4, %f48 /* FPA */ + fcmpgt32 %f32, %f4, %x3 /* FPM Group */ + faligndata %f4, %f6, %f50 /* FPA */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group */ + faligndata %f6, %f8, %f52 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + inc %x2 /* IEU1 */ + faligndata %f8, %f10, %f54 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + srl %x2, 1, %x2 /* IEU0 */ + faligndata %f10, %f12, %f56 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + inc %x3 /* IEU0 */ + add %sum, %x2, %sum /* IEU1 */ + faligndata %f12, %f14, %f58 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + srl %x3, 1, %x3 /* IEU0 */ + inc %x4 /* IEU1 */ + fmovd %f14, %f60 /* FPA */ + srl %x4, 1, %x4 /* IEU0 Group */ + add %sum, %x3, %sum /* IEU1 */ vis1: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f62,f48,f50,f52,f54,f56,f58,f60,f60, ,LDBLK(f32), ,STBLK,,,,,,, @@ -523,33 +525,33 @@ vis1e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,STBLK_XORASI(x1,x2),ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40), ,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e1) .align 2048 -vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - sub %src, 16, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f0 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f0, %f56 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - sub %dst, 64, %dst /* IEU0 */ - fpsub32 %f2, %f2, %f2 /* FPA Group */ - fcmpgt32 %f32, %f4, %x3 /* FPM Group */ - faligndata %f4, %f6, %f48 /* FPA */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group */ - faligndata %f6, %f8, %f50 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - faligndata %f8, %f10, %f52 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - faligndata %f10, %f12, %f54 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - inc %x3 /* IEU0 */ - faligndata %f12, %f14, %f56 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - srl %x3, 1, %x3 /* IEU0 */ - inc %x4 /* IEU1 */ - fmovd %f14, %f58 /* FPA */ - srl %x4, 1, %x4 /* IEU0 Group */ - add %sum, %x3, %sum /* IEU1 */ +vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + sub %src, 16, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f0 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f0, %f56 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + sub %dst, 64, %dst /* IEU0 */ + fpsub32 %f2, %f2, %f2 /* FPA Group */ + fcmpgt32 %f32, %f4, %x3 /* FPM Group */ + faligndata %f4, %f6, %f48 /* FPA */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group */ + faligndata %f6, %f8, %f50 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + faligndata %f8, %f10, %f52 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + faligndata %f10, %f12, %f54 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + inc %x3 /* IEU0 */ + faligndata %f12, %f14, %f56 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + srl %x3, 1, %x3 /* IEU0 */ + inc %x4 /* IEU1 */ + fmovd %f14, %f58 /* FPA */ + srl %x4, 1, %x4 /* IEU0 Group */ + add %sum, %x3, %sum /* IEU1 */ vis2: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f60,f62,f48,f50,f52,f54,f56,f58,f58, ,LDBLK(f32), ,,STBLK,,,,,, @@ -575,29 +577,29 @@ vis2e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,STBLK_XORASI(x2,x3),ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96), ,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e1) .align 2048 -vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - sub %src, 24, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f0 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f0, %f54 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - sub %dst, 64, %dst /* IEU0 */ - fpsub32 %f2, %f2, %f2 /* FPA Group */ - fpsub32 %f4, %f4, %f4 /* FPA Group */ - fcmpgt32 %f32, %f6, %x4 /* FPM Group */ - faligndata %f6, %f8, %f48 /* FPA */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - faligndata %f8, %f10, %f50 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - faligndata %f10, %f12, %f52 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - faligndata %f12, %f14, %f54 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - fmovd %f14, %f56 /* FPA */ - inc %x4 /* IEU0 */ - srl %x4, 1, %x4 /* IEU0 Group */ +vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + sub %src, 24, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f0 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f0, %f54 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + sub %dst, 64, %dst /* IEU0 */ + fpsub32 %f2, %f2, %f2 /* FPA Group */ + fpsub32 %f4, %f4, %f4 /* FPA Group */ + fcmpgt32 %f32, %f6, %x4 /* FPM Group */ + faligndata %f6, %f8, %f48 /* FPA */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + faligndata %f8, %f10, %f50 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + faligndata %f10, %f12, %f52 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + faligndata %f12, %f14, %f54 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + fmovd %f14, %f56 /* FPA */ + inc %x4 /* IEU0 */ + srl %x4, 1, %x4 /* IEU0 Group */ vis3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f58,f60,f62,f48,f50,f52,f54,f56,f56, ,LDBLK(f32), ,,,STBLK,,,,, @@ -623,27 +625,27 @@ vis3e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,STBLK_XORASI(x3,x4),ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88), ,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e1) .align 2048 -vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - sub %src, 32, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f0 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f0, %f52 /* FPA Group */ - fmovd %f48, %f0 /* FPA Group */ - sub %dst, 64, %dst /* IEU0 */ - fpsub32 %f2, %f2, %f2 /* FPA Group */ - fpsub32 %f4, %f4, %f4 /* FPA Group */ - fpsub32 %f6, %f6, %f6 /* FPA Group */ - clr %x4 /* IEU0 */ - fcmpgt32 %f32, %f8, %x5 /* FPM Group */ - faligndata %f8, %f10, %f48 /* FPA */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - faligndata %f10, %f12, %f50 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - faligndata %f12, %f14, %f52 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - fmovd %f14, %f54 /* FPA */ +vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + sub %src, 32, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f0 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f0, %f52 /* FPA Group */ + fmovd %f48, %f0 /* FPA Group */ + sub %dst, 64, %dst /* IEU0 */ + fpsub32 %f2, %f2, %f2 /* FPA Group */ + fpsub32 %f4, %f4, %f4 /* FPA Group */ + fpsub32 %f6, %f6, %f6 /* FPA Group */ + clr %x4 /* IEU0 */ + fcmpgt32 %f32, %f8, %x5 /* FPM Group */ + faligndata %f8, %f10, %f48 /* FPA */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + faligndata %f10, %f12, %f50 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + faligndata %f12, %f14, %f52 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + fmovd %f14, %f54 /* FPA */ vis4: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f56,f58,f60,f62,f48,f50,f52,f54,f54, ,LDBLK(f32), ,,,,STBLK,,,, @@ -669,27 +671,27 @@ vis4e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,,STBLK_XORASI(x4,x5),ST(f48,64),ST(f50,72),ST(f52,80), ,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e1) .align 2048 -vis5s: ldd [%src+0], %f10 /* Load Group */ - ldd [%src+8], %f12 /* Load Group */ - ldd [%src+16], %f14 /* Load Group */ - add %src, 24, %src /* IEU0 Group */ - wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f48, %f0 /* FPA Group */ - fmuld %f32, %f32, %f2 /* FPM */ - clr %x4 /* IEU0 */ - faddd %f32, %f32, %f4 /* FPA Group */ - fmuld %f32, %f32, %f6 /* FPM */ - clr %x5 /* IEU0 */ - faddd %f32, %f32, %f8 /* FPA Group */ - fcmpgt32 %f32, %f10, %x6 /* FPM Group */ - sub %dst, 64, %dst /* IEU0 */ - faligndata %f10, %f12, %f48 /* FPA */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - faligndata %f12, %f14, %f50 /* FPA */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - fmovd %f14, %f52 /* FPA */ +vis5s: ldd [%src+0], %f10 /* Load Group */ + ldd [%src+8], %f12 /* Load Group */ + ldd [%src+16], %f14 /* Load Group */ + add %src, 24, %src /* IEU0 Group */ + wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f48, %f0 /* FPA Group */ + fmuld %f32, %f32, %f2 /* FPM */ + clr %x4 /* IEU0 */ + faddd %f32, %f32, %f4 /* FPA Group */ + fmuld %f32, %f32, %f6 /* FPM */ + clr %x5 /* IEU0 */ + faddd %f32, %f32, %f8 /* FPA Group */ + fcmpgt32 %f32, %f10, %x6 /* FPM Group */ + sub %dst, 64, %dst /* IEU0 */ + faligndata %f10, %f12, %f48 /* FPA */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + faligndata %f12, %f14, %f50 /* FPA */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + fmovd %f14, %f52 /* FPA */ vis5: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f54,f56,f58,f60,f62,f48,f50,f52,f52, ,LDBLK(f32), ,,,,,STBLK,,, @@ -715,26 +717,26 @@ vis5e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,,,STBLK_XORASI(x5,x6),ST(f48,64),ST(f50,72), ,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e1) .align 2048 -vis6s: ldd [%src+0], %f12 /* Load Group */ - ldd [%src+8], %f14 /* Load Group */ - add %src, 16, %src /* IEU0 Group */ - wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f48, %f0 /* FPA Group */ - fmuld %f32, %f32, %f2 /* FPM */ - clr %x4 /* IEU0 */ - faddd %f32, %f32, %f4 /* FPA Group */ - fmuld %f32, %f32, %f6 /* FPM */ - clr %x5 /* IEU0 */ - faddd %f32, %f32, %f8 /* FPA Group */ - fmuld %f32, %f32, %f10 /* FPM */ - clr %x6 /* IEU0 */ - fcmpgt32 %f32, %f12, %x7 /* FPM Group */ - sub %dst, 64, %dst /* IEU0 */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - faligndata %f12, %f14, %f48 /* FPA */ - fmovd %f14, %f50 /* FPA Group */ +vis6s: ldd [%src+0], %f12 /* Load Group */ + ldd [%src+8], %f14 /* Load Group */ + add %src, 16, %src /* IEU0 Group */ + wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f48, %f0 /* FPA Group */ + fmuld %f32, %f32, %f2 /* FPM */ + clr %x4 /* IEU0 */ + faddd %f32, %f32, %f4 /* FPA Group */ + fmuld %f32, %f32, %f6 /* FPM */ + clr %x5 /* IEU0 */ + faddd %f32, %f32, %f8 /* FPA Group */ + fmuld %f32, %f32, %f10 /* FPM */ + clr %x6 /* IEU0 */ + fcmpgt32 %f32, %f12, %x7 /* FPM Group */ + sub %dst, 64, %dst /* IEU0 */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + faligndata %f12, %f14, %f48 /* FPA */ + fmovd %f14, %f50 /* FPA Group */ vis6: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f52,f54,f56,f58,f60,f62,f48,f50,f50, ,LDBLK(f32), ,,,,,,STBLK,, @@ -760,25 +762,25 @@ vis6e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 ,SYNC, ,,,,,,STBLK_XORASI(x6,x7),ST(f48,64), ,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e1) .align 2048 -vis7s: ldd [%src+0], %f14 /* Load Group */ - add %src, 8, %src /* IEU0 Group */ - wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ - ldda [%src] ASI_BLK_P, %f16 /* Load Group */ - add %src, 64, %src /* IEU0 Group */ - fmovd %f48, %f0 /* FPA Group */ - fmuld %f32, %f32, %f2 /* FPM */ - clr %x4 /* IEU0 */ - faddd %f32, %f32, %f4 /* FPA Group */ - fmuld %f32, %f32, %f6 /* FPM */ - clr %x5 /* IEU0 */ - faddd %f32, %f32, %f8 /* FPA Group */ - fmuld %f32, %f32, %f10 /* FPM */ - clr %x6 /* IEU0 */ - faddd %f32, %f32, %f12 /* FPA Group */ - clr %x7 /* IEU0 */ - fcmpgt32 %f32, %f14, %x8 /* FPM Group */ - sub %dst, 64, %dst /* IEU0 */ - fmovd %f14, %f48 /* FPA */ +vis7s: ldd [%src+0], %f14 /* Load Group */ + add %src, 8, %src /* IEU0 Group */ + wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ + ldda [%src] ASI_BLK_P, %f16 /* Load Group */ + add %src, 64, %src /* IEU0 Group */ + fmovd %f48, %f0 /* FPA Group */ + fmuld %f32, %f32, %f2 /* FPM */ + clr %x4 /* IEU0 */ + faddd %f32, %f32, %f4 /* FPA Group */ + fmuld %f32, %f32, %f6 /* FPM */ + clr %x5 /* IEU0 */ + faddd %f32, %f32, %f8 /* FPA Group */ + fmuld %f32, %f32, %f10 /* FPM */ + clr %x6 /* IEU0 */ + faddd %f32, %f32, %f12 /* FPA Group */ + clr %x7 /* IEU0 */ + fcmpgt32 %f32, %f14, %x8 /* FPM Group */ + sub %dst, 64, %dst /* IEU0 */ + fmovd %f14, %f48 /* FPA */ vis7: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f50,f52,f54,f56,f58,f60,f62,f48,f48, ,LDBLK(f32), ,,,,,,,STBLK, @@ -806,104 +808,104 @@ vis7e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14 e1: END_THE_TRICK1( f0,f2,f4,f6,f8,f10,f12,f14,f16,f6) e2: END_THE_TRICK1( f16,f18,f20,f22,f24,f26,f28,f30,f32,f6) e3: END_THE_TRICK1( f32,f34,f36,f38,f40,f42,f44,f46,f0,f6) -ett: rd %gsr, %x3 /* LSU Group+4bubbles */ - andcc %x3, 7, %x3 /* IEU1 Group */ - add %dst, 8, %dst /* IEU0 */ - bne,pn %icc, 1f /* CTI */ - fzero %f10 /* FPA */ - brz,a,pn %len, 2f /* CTI+IEU1 Group */ - stda %f6, [%dst - 8] %asi /* Store */ -1: cmp %len, 8 /* IEU1 */ - blu,pn %icc, 3f /* CTI */ - sub %src, 64, %src /* IEU0 Group */ -1: ldd [%src], %f2 /* Load Group */ - fpadd32 %f10, %f2, %f12 /* FPA Group+load stall */ - add %src, 8, %src /* IEU0 */ - add %dst, 8, %dst /* IEU1 */ - faligndata %f6, %f2, %f14 /* FPA Group */ - fcmpgt32 %f10, %f12, %x5 /* FPM Group */ - stda %f14, [%dst - 16] %asi /* Store */ - fmovd %f2, %f6 /* FPA */ - fmovd %f12, %f10 /* FPA Group */ - sub %len, 8, %len /* IEU1 */ - fzero %f16 /* FPA Group - FPU nop */ - fzero %f18 /* FPA Group - FPU nop */ - inc %x5 /* IEU0 */ - srl %x5, 1, %x5 /* IEU0 Group (regdep) */ - cmp %len, 8 /* IEU1 */ - bgeu,pt %icc, 1b /* CTI */ - add %x5, %sum, %sum /* IEU0 Group */ -3: brz,a,pt %x3, 2f /* CTI+IEU1 */ - stda %f6, [%dst - 8] %asi /* Store Group */ - sta %f7, [%dst - 8] %asi /* Store Group */ - sub %dst, 4, %dst /* IEU0 */ - add %len, 4, %len /* IEU1 */ +ett: rd %gsr, %x3 /* LSU Group+4bubbles */ + andcc %x3, 7, %x3 /* IEU1 Group */ + add %dst, 8, %dst /* IEU0 */ + bne,pn %icc, 1f /* CTI */ + fzero %f10 /* FPA */ + brz,a,pn %len, 2f /* CTI+IEU1 Group */ + stda %f6, [%dst - 8] %asi /* Store */ +1: cmp %len, 8 /* IEU1 */ + blu,pn %icc, 3f /* CTI */ + sub %src, 64, %src /* IEU0 Group */ +1: ldd [%src], %f2 /* Load Group */ + fpadd32 %f10, %f2, %f12 /* FPA Group+load stall*/ + add %src, 8, %src /* IEU0 */ + add %dst, 8, %dst /* IEU1 */ + faligndata %f6, %f2, %f14 /* FPA Group */ + fcmpgt32 %f10, %f12, %x5 /* FPM Group */ + stda %f14, [%dst - 16] %asi /* Store */ + fmovd %f2, %f6 /* FPA */ + fmovd %f12, %f10 /* FPA Group */ + sub %len, 8, %len /* IEU1 */ + fzero %f16 /* FPA Group - FPU nop */ + fzero %f18 /* FPA Group - FPU nop */ + inc %x5 /* IEU0 */ + srl %x5, 1, %x5 /* IEU0 Group (regdep) */ + cmp %len, 8 /* IEU1 */ + bgeu,pt %icc, 1b /* CTI */ + add %x5, %sum, %sum /* IEU0 Group */ +3: brz,a,pt %x3, 2f /* CTI+IEU1 */ + stda %f6, [%dst - 8] %asi /* Store Group */ + sta %f7, [%dst - 8] %asi /* Store Group */ + sub %dst, 4, %dst /* IEU0 */ + add %len, 4, %len /* IEU1 */ 2: #ifdef __KERNEL__ - sub %sp, 8, %sp /* IEU0 Group */ + sub %sp, 8, %sp /* IEU0 Group */ #endif END_THE_TRICK2( f48,f50,f52,f54,f56,f58,f60,f10,f12,f62) - membar #Sync /* LSU Group */ + membar #Sync /* LSU Group */ #ifdef __KERNEL__ VISExit - add %sp, 8, %sp /* IEU0 Group */ + add %sp, 8, %sp /* IEU0 Group */ #endif -23: brnz,pn %len, 26f /* CTI+IEU1 Group */ -24: sllx %sum, 32, %g1 /* IEU0 */ -25: addcc %sum, %g1, %src /* IEU1 Group */ - srlx %src, 32, %src /* IEU0 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %src, 1, %src /* IEU1 */ +23: brnz,pn %len, 26f /* CTI+IEU1 Group */ +24: sllx %sum, 32, %g1 /* IEU0 */ +25: addcc %sum, %g1, %src /* IEU1 Group */ + srlx %src, 32, %src /* IEU0 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %src, 1, %src /* IEU1 */ #ifndef __KERNEL__ -1: retl /* CTI Group brk forced */ - srl %src, 0, %src /* IEU0 */ +1: retl /* CTI Group brk forced*/ + srl %src, 0, %src /* IEU0 */ #else -1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */ - retl /* CTI Group brk forced */ - sllx %g4, 32, %g4 /* IEU0 */ +1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */ + retl /* CTI Group brk forced*/ + sllx %g4, 32, %g4 /* IEU0 */ #endif -26: andcc %len, 8, %g0 /* IEU1 Group */ - be,pn %icc, 1f /* CTI */ - lduw [%src], %o4 /* Load */ - lduw [%src+4], %g2 /* Load Group */ - add %src, 8, %src /* IEU0 */ - add %dst, 8, %dst /* IEU1 */ - sllx %o4, 32, %g5 /* IEU0 Group */ - stwa %o4, [%dst - 8] %asi /* Store */ - or %g5, %g2, %g5 /* IEU0 Group */ - stwa %g2, [%dst - 4] %asi /* Store */ - addcc %g5, %sum, %sum /* IEU1 Group */ - bcs,a,pn %xcc, 1f /* CTI */ - add %sum, 1, %sum /* IEU0 */ -1: andcc %len, 4, %g0 /* IEU1 Group */ - be,a,pn %icc, 1f /* CTI */ - clr %g2 /* IEU0 */ - lduw [%src], %g7 /* Load */ - add %src, 4, %src /* IEU0 Group */ - add %dst, 4, %dst /* IEU1 */ - sllx %g7, 32, %g2 /* IEU0 Group */ - stwa %g7, [%dst - 4] %asi /* Store */ -1: andcc %len, 2, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %g3 /* IEU0 Group */ - lduh [%src], %g7 /* Load */ - add %src, 2, %src /* IEU1 */ - add %dst, 2, %dst /* IEU0 Group */ - sll %g7, 16, %g3 /* IEU0 Group */ - stha %g7, [%dst - 2] %asi /* Store */ -1: andcc %len, 1, %g0 /* IEU1 */ - be,a,pn %icc, 1f /* CTI */ - clr %o5 /* IEU0 Group */ - ldub [%src], %g7 /* Load */ - sll %g7, 8, %o5 /* IEU0 Group */ - stba %g7, [%dst] %asi /* Store */ -1: or %g2, %g3, %g3 /* IEU1 */ - or %o5, %g3, %g3 /* IEU0 Group (regdep) */ - addcc %g3, %sum, %sum /* IEU1 Group (regdep) */ - bcs,a,pn %xcc, 1f /* CTI */ - add %sum, 1, %sum /* IEU0 */ -1: ba,pt %xcc, 25b /* CTI Group */ - sllx %sum, 32, %g1 /* IEU0 */ +26: andcc %len, 8, %g0 /* IEU1 Group */ + be,pn %icc, 1f /* CTI */ + lduw [%src], %o4 /* Load */ + lduw [%src+4], %g2 /* Load Group */ + add %src, 8, %src /* IEU0 */ + add %dst, 8, %dst /* IEU1 */ + sllx %o4, 32, %g5 /* IEU0 Group */ + stwa %o4, [%dst - 8] %asi /* Store */ + or %g5, %g2, %g5 /* IEU0 Group */ + stwa %g2, [%dst - 4] %asi /* Store */ + addcc %g5, %sum, %sum /* IEU1 Group */ + bcs,a,pn %xcc, 1f /* CTI */ + add %sum, 1, %sum /* IEU0 */ +1: andcc %len, 4, %g0 /* IEU1 Group */ + be,a,pn %icc, 1f /* CTI */ + clr %g2 /* IEU0 */ + lduw [%src], %g7 /* Load */ + add %src, 4, %src /* IEU0 Group */ + add %dst, 4, %dst /* IEU1 */ + sllx %g7, 32, %g2 /* IEU0 Group */ + stwa %g7, [%dst - 4] %asi /* Store */ +1: andcc %len, 2, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %g3 /* IEU0 Group */ + lduh [%src], %g7 /* Load */ + add %src, 2, %src /* IEU1 */ + add %dst, 2, %dst /* IEU0 Group */ + sll %g7, 16, %g3 /* IEU0 Group */ + stha %g7, [%dst - 2] %asi /* Store */ +1: andcc %len, 1, %g0 /* IEU1 */ + be,a,pn %icc, 1f /* CTI */ + clr %o5 /* IEU0 Group */ + ldub [%src], %g7 /* Load */ + sll %g7, 8, %o5 /* IEU0 Group */ + stba %g7, [%dst] %asi /* Store */ +1: or %g2, %g3, %g3 /* IEU1 */ + or %o5, %g3, %g3 /* IEU0 Group (regdep) */ + addcc %g3, %sum, %sum /* IEU1 Group (regdep) */ + bcs,a,pn %xcc, 1f /* CTI */ + add %sum, 1, %sum /* IEU0 */ +1: ba,pt %xcc, 25b /* CTI Group */ + sllx %sum, 32, %g1 /* IEU0 */ #ifdef __KERNEL__ end: |