From 116674acc97ba75a720329996877077d988443a2 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 9 Mar 2001 20:33:35 +0000 Subject: Merge with Linux 2.4.2. --- arch/alpha/kernel/Makefile | 4 +- arch/alpha/kernel/alpha_ksyms.c | 6 ++ arch/alpha/kernel/irq.c | 2 +- arch/alpha/kernel/irq_alpha.c | 2 +- arch/alpha/kernel/osf_sys.c | 4 +- arch/alpha/kernel/pci-noop.c | 104 ++++++++++++++++++++ arch/alpha/kernel/process.c | 16 ++-- arch/alpha/kernel/ptrace.c | 2 +- arch/alpha/kernel/setup.c | 2 +- arch/alpha/kernel/smc37c669.c | 2 +- arch/alpha/kernel/smc37c93x.c | 2 +- arch/alpha/kernel/smp.c | 2 +- arch/alpha/kernel/sys_ruffian.c | 74 ++++++++++++++- arch/alpha/kernel/traps.c | 2 + arch/alpha/lib/Makefile | 2 + arch/alpha/lib/clear_page.S | 39 ++++++++ arch/alpha/lib/copy_page.S | 49 ++++++++++ arch/alpha/lib/ev6-clear_page.S | 54 +++++++++++ arch/alpha/lib/ev6-copy_page.S | 203 ++++++++++++++++++++++++++++++++++++++++ 19 files changed, 547 insertions(+), 24 deletions(-) create mode 100644 arch/alpha/kernel/pci-noop.c create mode 100644 arch/alpha/lib/clear_page.S create mode 100644 arch/alpha/lib/copy_page.S create mode 100644 arch/alpha/lib/ev6-clear_page.S create mode 100644 arch/alpha/lib/ev6-copy_page.S (limited to 'arch/alpha') diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index abc04cca2..08ec1d613 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -30,9 +30,7 @@ ifdef CONFIG_VGA_HOSE obj-y += console.o endif - obj-$(CONFIG_SMP) += smp.o irq_smp.o - obj-$(CONFIG_PCI) += pci.o pci_iommu.o ifdef CONFIG_ALPHA_GENERIC @@ -76,7 +74,7 @@ obj-y += sys_eb64p.o endif obj-$(CONFIG_ALPHA_EIGER) += sys_eiger.o -obj-$(CONFIG_ALPHA_JENSEN) += sys_jensen.o +obj-$(CONFIG_ALPHA_JENSEN) += sys_jensen.o pci-noop.o obj-$(CONFIG_ALPHA_MIATA) += sys_miata.o obj-$(CONFIG_ALPHA_MIKASA) += sys_mikasa.o obj-$(CONFIG_ALPHA_NAUTILUS) += sys_nautilus.o diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c index 17285ac26..d7bf13ec3 100644 --- a/arch/alpha/kernel/alpha_ksyms.c +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -98,9 +98,13 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memsetw); EXPORT_SYMBOL(__constant_c_memset); +EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(__direct_map_base); EXPORT_SYMBOL(__direct_map_size); + +#ifdef CONFIG_PCI EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); EXPORT_SYMBOL(pci_map_single); @@ -108,6 +112,7 @@ EXPORT_SYMBOL(pci_unmap_single); EXPORT_SYMBOL(pci_map_sg); EXPORT_SYMBOL(pci_unmap_sg); EXPORT_SYMBOL(pci_dma_supported); +#endif EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); @@ -166,6 +171,7 @@ EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__up_wakeup); EXPORT_SYMBOL(down); EXPORT_SYMBOL(down_interruptible); +EXPORT_SYMBOL(down_trylock); EXPORT_SYMBOL(up); EXPORT_SYMBOL(__down_read_failed); EXPORT_SYMBOL(__down_write_failed); diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 080e48e43..825eaf2d5 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 59f102496..91e99f573 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -18,7 +18,7 @@ unsigned long __irq_attempt[NR_IRQS]; #endif -/* Hack minimum IPL during interupt processing for broken hardware. */ +/* Hack minimum IPL during interrupt processing for broken hardware. */ #ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK int __min_ipl; #endif diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index cd28b07fc..6159457d9 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -74,8 +74,10 @@ asmlinkage int osf_set_program_attributes( mm = current->mm; mm->end_code = bss_start + bss_len; mm->brk = bss_start + bss_len; +#if 0 printk("set_program_attributes(%lx %lx %lx %lx)\n", text_start, text_len, bss_start, bss_len); +#endif unlock_kernel(); return 0; } diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c new file mode 100644 index 000000000..d530ebb82 --- /dev/null +++ b/arch/alpha/kernel/pci-noop.c @@ -0,0 +1,104 @@ +/* + * linux/arch/alpha/kernel/pci-noop.c + * + * Stub PCI interfaces for Jensen-specific kernels. + */ + +#include +#include +#include +#include +#include + +#include "proto.h" + + +/* + * The PCI controler list. + */ + +struct pci_controler *hose_head, **hose_tail = &hose_head; +struct pci_controler *pci_isa_hose; + + +struct pci_controler * __init +alloc_pci_controler(void) +{ + struct pci_controler *hose; + + hose = alloc_bootmem(sizeof(*hose)); + + *hose_tail = hose; + hose_tail = &hose->next; + + return hose; +} + +struct resource * __init +alloc_resource(void) +{ + struct resource *res; + + res = alloc_bootmem(sizeof(*res)); + + return res; +} + +asmlinkage long +sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn) +{ + struct pci_controler *hose; + struct pci_dev *dev; + + /* from hose or from bus.devfn */ + if (which & IOBASE_FROM_HOSE) { + for (hose = hose_head; hose; hose = hose->next) + if (hose->index == bus) + break; + if (!hose) + return -ENODEV; + } else { + /* Special hook for ISA access. */ + if (bus == 0 && dfn == 0) + hose = pci_isa_hose; + else + return -ENODEV; + } + + switch (which & ~IOBASE_FROM_HOSE) { + case IOBASE_HOSE: + return hose->index; + case IOBASE_SPARSE_MEM: + return hose->sparse_mem_base; + case IOBASE_DENSE_MEM: + return hose->dense_mem_base; + case IOBASE_SPARSE_IO: + return hose->sparse_io_base; + case IOBASE_DENSE_IO: + return hose->dense_io_base; + case IOBASE_ROOT_BUS: + return hose->bus->number; + } + + return -EOPNOTSUPP; +} + +asmlinkage long +sys_pciconfig_read(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, void *buf) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + else + return -ENODEV; +} + +asmlinkage long +sys_pciconfig_write(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, void *buf) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + else + return -ENODEV; +} diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 508e278b0..2dd505e10 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -416,22 +416,20 @@ dump_fpu(struct pt_regs * regs, elf_fpregset_t *r) * Don't do this at home. */ asmlinkage int -sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, - unsigned long a3, unsigned long a4, unsigned long a5, - struct pt_regs regs) +sys_execve(char *ufilename, char **argv, char **envp, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs regs) { int error; - char * filename; + char *filename; - lock_kernel(); - filename = getname((char *) a0); + filename = getname(ufilename); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char **) a1, (char **) a2, ®s); + error = do_execve(filename, argv, envp, ®s); putname(filename); out: - unlock_kernel(); return error; } diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 09fcfd787..a919e7c2f 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 2acf56d96..a6443da53 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c index 0b2db18ab..3ffb611f2 100644 --- a/arch/alpha/kernel/smc37c669.c +++ b/arch/alpha/kernel/smc37c669.c @@ -3,7 +3,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/smc37c93x.c b/arch/alpha/kernel/smc37c93x.c index 5448305a3..b0e15d307 100644 --- a/arch/alpha/kernel/smc37c93x.c +++ b/arch/alpha/kernel/smc37c93x.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index e91e77895..4f877b10b 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -837,7 +837,7 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait) atomic_set(&data.unstarted_count, smp_num_cpus - 1); atomic_set(&data.unfinished_count, smp_num_cpus - 1); - /* Aquire the smp_call_function_data mutex. */ + /* Acquire the smp_call_function_data mutex. */ if (pointer_lock(&smp_call_function_data, &data, retry)) return -EBUSY; diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index c9a2e79a4..0d9713377 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -92,14 +92,80 @@ ruffian_kill_arch (int mode) #endif } +/* + * Interrupt routing: + * + * Primary bus + * IdSel INTA INTB INTC INTD + * 21052 13 - - - - + * SIO 14 23 - - - + * 21143 15 44 - - - + * Slot 0 17 43 42 41 40 + * + * Secondary bus + * IdSel INTA INTB INTC INTD + * Slot 0 8 (18) 19 18 17 16 + * Slot 1 9 (19) 31 30 29 28 + * Slot 2 10 (20) 27 26 25 24 + * Slot 3 11 (21) 39 38 37 36 + * Slot 4 12 (22) 35 34 33 32 + * 53c875 13 (23) 20 - - - + * + */ + static int __init ruffian_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { - /* We don't know anything about the PCI routing, so leave - the IRQ unchanged. */ - return dev->irq; + static char irq_tab[11][5] __initdata = { + /*INT INTA INTB INTC INTD */ + {-1, -1, -1, -1, -1}, /* IdSel 13, 21052 */ + {-1, -1, -1, -1, -1}, /* IdSel 14, SIO */ + {44, 44, 44, 44, 44}, /* IdSel 15, 21143 */ + {-1, -1, -1, -1, -1}, /* IdSel 16, none */ + {43, 43, 42, 41, 40}, /* IdSel 17, 64-bit slot */ + /* the next 6 are actually on PCI bus 1, across the bridge */ + {19, 19, 18, 17, 16}, /* IdSel 8, slot 0 */ + {31, 31, 30, 29, 28}, /* IdSel 9, slot 1 */ + {27, 27, 26, 25, 24}, /* IdSel 10, slot 2 */ + {39, 39, 38, 37, 36}, /* IdSel 11, slot 3 */ + {35, 35, 34, 33, 32}, /* IdSel 12, slot 4 */ + {20, 20, 20, 20, 20}, /* IdSel 13, 53c875 */ + }; + const long min_idsel = 13, max_idsel = 23, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; } +static u8 __init +ruffian_swizzle(struct pci_dev *dev, u8 *pinp) +{ + int slot, pin = *pinp; + + if (dev->bus->number == 0) { + slot = PCI_SLOT(dev->devfn); + } + /* Check for the built-in bridge. */ + else if (PCI_SLOT(dev->bus->self->devfn) == 13) { + slot = PCI_SLOT(dev->devfn) + 10; + } + else + { + /* Must be a card-based bridge. */ + do { + if (PCI_SLOT(dev->bus->self->devfn) == 13) { + slot = PCI_SLOT(dev->devfn) + 10; + break; + } + pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)); + + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; +} #ifdef BUILDING_FOR_MILO /* @@ -164,6 +230,6 @@ struct alpha_machine_vector ruffian_mv __initmv = { init_pci: cia_init_pci, kill_arch: ruffian_kill_arch, pci_map_irq: ruffian_map_irq, - pci_swizzle: common_swizzle, + pci_swizzle: ruffian_swizzle, }; ALIAS_MV(ruffian) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 7b2f8be03..ed7d2f68e 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -1093,7 +1093,9 @@ alpha_ni_syscall(unsigned long a0, unsigned long a1, unsigned long a2, { /* We only get here for OSF system calls, minus #112; the rest go to sys_ni_syscall. */ +#if 0 printk("", regs.r0, a0, a1, a2); +#endif return -ENOSYS; } diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index 1e3e485b5..d95ace0d7 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -42,6 +42,8 @@ OBJS = __divqu.o __remqu.o __divlu.o __remlu.o \ $(ev6)strncpy_from_user.o \ $(ev67)strlen_user.o \ $(ev6)csum_ipv6_magic.o \ + $(ev6)clear_page.o \ + $(ev6)copy_page.o \ strcasecmp.o \ fpreg.o \ callback_srm.o srm_puts.o srm_printk.o diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S new file mode 100644 index 000000000..a221ae266 --- /dev/null +++ b/arch/alpha/lib/clear_page.S @@ -0,0 +1,39 @@ +/* + * arch/alpha/lib/clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + nop + unop + nop + +1: stq $31,0($16) + stq $31,8($16) + stq $31,16($16) + stq $31,24($16) + + stq $31,32($16) + stq $31,40($16) + stq $31,48($16) + subq $0,1,$0 + + stq $31,56($16) + addq $16,64,$16 + unop + bne $0,1b + + ret + nop + unop + nop + + .end clear_page diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S new file mode 100644 index 000000000..9f3b97459 --- /dev/null +++ b/arch/alpha/lib/copy_page.S @@ -0,0 +1,49 @@ +/* + * arch/alpha/lib/copy_page.S + * + * Copy an entire page. + */ + + .text + .align 4 + .global copy_page + .ent copy_page +copy_page: + .prologue 0 + + lda $18,128 + nop + unop + nop + +1: ldq $0,0($17) + ldq $1,8($17) + ldq $2,16($17) + ldq $3,24($17) + + ldq $4,32($17) + ldq $5,40($17) + ldq $6,48($17) + ldq $7,56($17) + + stq $0,0($16) + subq $18,1,$18 + stq $1,8($16) + addq $17,64,$17 + + stq $2,16($16) + stq $3,24($16) + stq $4,32($16) + stq $5,40($16) + + stq $6,48($16) + stq $7,56($16) + addq $16,64,$16 + bne $18, 1b + + ret + nop + unop + nop + + .end copy_page diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S new file mode 100644 index 000000000..adf4f7be0 --- /dev/null +++ b/arch/alpha/lib/ev6-clear_page.S @@ -0,0 +1,54 @@ +/* + * arch/alpha/lib/ev6-clear_page.S + * + * Zero an entire page. + */ + + .text + .align 4 + .global clear_page + .ent clear_page +clear_page: + .prologue 0 + + lda $0,128 + lda $1,125 + addq $16,64,$2 + addq $16,128,$3 + + addq $16,192,$17 + wh64 ($16) + wh64 ($2) + wh64 ($3) + +1: wh64 ($17) + stq $31,0($16) + subq $0,1,$0 + subq $1,1,$1 + + stq $31,8($16) + stq $31,16($16) + addq $17,64,$2 + nop + + stq $31,24($16) + stq $31,32($16) + cmovgt $1,$2,$17 + nop + + stq $31,40($16) + stq $31,48($16) + nop + nop + + stq $31,56($16) + addq $16,64,$16 + nop + bne $0,1b + + ret + nop + nop + nop + + .end clear_page diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S new file mode 100644 index 000000000..b789db192 --- /dev/null +++ b/arch/alpha/lib/ev6-copy_page.S @@ -0,0 +1,203 @@ +/* + * arch/alpha/lib/ev6-copy_page.S + * + * Copy an entire page. + */ + +/* The following comparison of this routine vs the normal copy_page.S + was written by an unnamed ev6 hardware designer and forwarded to me + via Steven Hobbs . + + First Problem: STQ overflows. + ----------------------------- + + It would be nice if EV6 handled every resource overflow efficiently, + but for some it doesn't. Including store queue overflows. It causes + a trap and a restart of the pipe. + + To get around this we sometimes use (to borrow a term from a VSSAD + researcher) "aeration". The idea is to slow the rate at which the + processor receives valid instructions by inserting nops in the fetch + path. In doing so, you can prevent the overflow and actually make + the code run faster. You can, of course, take advantage of the fact + that the processor can fetch at most 4 aligned instructions per cycle. + + I inserted enough nops to force it to take 10 cycles to fetch the + loop code. In theory, EV6 should be able to execute this loop in + 9 cycles but I was not able to get it to run that fast -- the initial + conditions were such that I could not reach this optimum rate on + (chaotic) EV6. I wrote the code such that everything would issue + in order. + + Second Problem: Dcache index matches. + ------------------------------------- + + If you are going to use this routine on random aligned pages, there + is a 25% chance that the pages will be at the same dcache indices. + This results in many nasty memory traps without care. + + The solution is to schedule the prefetches to avoid the memory + conflicts. I schedule the wh64 prefetches farther ahead of the + read prefetches to avoid this problem. + + Third Problem: Needs more prefetching. + -------------------------------------- + + In order to improve the code I added deeper prefetching to take the + most advantage of EV6's bandwidth. + + I also prefetched the read stream. Note that adding the read prefetch + forced me to add another cycle to the inner-most kernel - up to 11 + from the original 8 cycles per iteration. We could improve performance + further by unrolling the loop and doing multiple prefetches per cycle. + + I think that the code below will be very robust and fast code for the + purposes of copying aligned pages. It is slower when both source and + destination pages are in the dcache, but it is my guess that this is + less important than the dcache miss case. */ + + + .text + .align 4 + .global copy_page + .ent copy_page +copy_page: + .prologue 0 + + /* Prefetch 5 read cachelines; write-hint 10 cache lines. */ + wh64 ($16) + ldl $31,0($17) + ldl $31,64($17) + lda $1,1*64($16) + + wh64 ($1) + ldl $31,128($17) + ldl $31,192($17) + lda $1,2*64($16) + + wh64 ($1) + ldl $31,256($17) + lda $18,118 + lda $1,3*64($16) + + wh64 ($1) + nop + lda $1,4*64($16) + lda $2,5*64($16) + + wh64 ($1) + wh64 ($2) + lda $1,6*64($16) + lda $2,7*64($16) + + wh64 ($1) + wh64 ($2) + lda $1,8*64($16) + lda $2,9*64($16) + + wh64 ($1) + wh64 ($2) + lda $19,10*64($16) + nop + + /* Main prefetching/write-hinting loop. */ +1: ldq $0,0($17) + ldq $1,8($17) + unop + unop + + unop + unop + ldq $2,16($17) + ldq $3,24($17) + + ldq $4,32($17) + ldq $5,40($17) + unop + unop + + unop + unop + ldq $6,48($17) + ldq $7,56($17) + + ldl $31,320($17) + unop + unop + unop + + /* This gives the extra cycle of aeration above the minimum. */ + unop + unop + unop + unop + + wh64 ($19) + unop + unop + unop + + stq $0,0($16) + subq $18,1,$18 + stq $1,8($16) + unop + + unop + stq $2,16($16) + addq $17,64,$17 + stq $3,24($16) + + stq $4,32($16) + stq $5,40($16) + addq $19,64,$19 + unop + + stq $6,48($16) + stq $7,56($16) + addq $16,64,$16 + bne $18, 1b + + /* Prefetch the final 5 cache lines of the read stream. */ + lda $18,10 + ldl $31,320($17) + ldl $31,384($17) + ldl $31,448($17) + + ldl $31,512($17) + ldl $31,576($17) + nop + nop + + /* Non-prefetching, non-write-hinting cleanup loop for the + final 10 cache lines. */ +2: ldq $0,0($17) + ldq $1,8($17) + ldq $2,16($17) + ldq $3,24($17) + + ldq $4,32($17) + ldq $5,40($17) + ldq $6,48($17) + ldq $7,56($17) + + stq $0,0($16) + subq $18,1,$18 + stq $1,8($16) + addq $17,64,$17 + + stq $2,16($16) + stq $3,24($16) + stq $4,32($16) + stq $5,40($16) + + stq $6,48($16) + stq $7,56($16) + addq $16,64,$16 + bne $18, 2b + + ret + nop + unop + nop + + .end copy_page -- cgit v1.2.3