From b2ad5f821b1381492d792ca10b1eb7a107b48f14 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 10 Jan 2001 17:17:53 +0000 Subject: Merge with Linux 2.4.0-prerelease. Big Makefile rewrite, test your Makefiles. --- arch/alpha/config.in | 1 + arch/alpha/kernel/Makefile | 146 +++++++++++++++--------------------------- arch/alpha/kernel/smp.c | 63 ++++++++++++++---- arch/alpha/lib/Makefile | 2 +- arch/alpha/lib/ev6-memcpy.S | 2 +- arch/alpha/lib/ev67-strrchr.S | 109 +++++++++++++++++++++++++++++++ arch/alpha/lib/memmove.S | 99 ++++++++++++++++++++++++---- arch/alpha/math-emu/Makefile | 22 ++++--- arch/alpha/mm/fault.c | 2 +- 9 files changed, 312 insertions(+), 134 deletions(-) create mode 100644 arch/alpha/lib/ev67-strrchr.S (limited to 'arch/alpha') diff --git a/arch/alpha/config.in b/arch/alpha/config.in index 649047e66..fe0471a26 100644 --- a/arch/alpha/config.in +++ b/arch/alpha/config.in @@ -3,6 +3,7 @@ # see Documentation/kbuild/config-language.txt. # +define_bool CONFIG_ALPHA y define_bool CONFIG_UID16 n mainmenu_name "Kernel configuration of Linux for Alpha machines" diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 3b8d98bd2..85cc7ed48 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -13,29 +13,31 @@ $(CC) $(AFLAGS) -c -o $*.o $< O_TARGET := kernel.o -O_OBJS := entry.o traps.o process.o osf_sys.o irq.o irq_alpha.o \ - signal.o setup.o ptrace.o time.o semaphore.o -OX_OBJS := alpha_ksyms.o -L_TARGET := rest.a -L_OBJS := irq_i8259.o irq_srm.o irq_pyxis.o \ - es1888.o smc37c669.o smc37c93x.o ns87312.o +export-objs := alpha_ksyms.o -ifdef CONFIG_SMP -O_OBJS += smp.o irq_smp.o -endif +obj-y := entry.o traps.o process.o osf_sys.o irq.o irq_alpha.o \ + signal.o setup.o ptrace.o time.o semaphore.o alpha_ksyms.o -ifdef CONFIG_PCI -O_OBJS += pci.o pci_iommu.o -endif +# +# FIXME! +# These should be made conditional on the stuff that needs them! +# +obj-y += irq_i8259.o irq_srm.o irq_pyxis.o \ + es1888.o smc37c669.o smc37c93x.o ns87312.o ifdef CONFIG_VGA_HOSE -L_OBJS += console.o +obj-y += console.o endif + +obj-$(CONFIG_SMP) += smp.o irq_smp.o + +obj-$(CONFIG_PCI) += pci.o pci_iommu.o + ifdef CONFIG_ALPHA_GENERIC -O_OBJS += core_apecs.o core_cia.o core_irongate.o core_lca.o core_mcpcia.o \ +obj-y += core_apecs.o core_cia.o core_irongate.o core_lca.o core_mcpcia.o \ core_polaris.o core_t2.o core_tsunami.o core_titan.o \ sys_alcor.o sys_cabriolet.o sys_dp264.o sys_eb64p.o sys_eiger.o \ sys_jensen.o sys_miata.o sys_mikasa.o sys_nautilus.o sys_titan.o \ @@ -46,99 +48,53 @@ O_OBJS += core_apecs.o core_cia.o core_irongate.o core_lca.o core_mcpcia.o \ else # Core logic support -ifdef CONFIG_ALPHA_APECS -O_OBJS += core_apecs.o -endif -ifdef CONFIG_ALPHA_CIA -O_OBJS += core_cia.o -endif -ifdef CONFIG_ALPHA_IRONGATE -O_OBJS += core_irongate.o -endif -ifdef CONFIG_ALPHA_LCA -O_OBJS += core_lca.o -endif -ifdef CONFIG_ALPHA_MCPCIA -O_OBJS += core_mcpcia.o -endif -ifdef CONFIG_ALPHA_T2 -O_OBJS += core_t2.o -endif -ifdef CONFIG_ALPHA_TSUNAMI -O_OBJS += core_tsunami.o -endif -ifdef CONFIG_ALPHA_TITAN -O_OBJS += core_titan.o -endif -ifdef CONFIG_ALPHA_POLARIS -O_OBJS += core_polaris.o -endif -ifdef CONFIG_ALPHA_WILDFIRE -O_OBJS += core_wildfire.o -endif +obj-$(CONFIG_ALPHA_APECS) += core_apecs.o +obj-$(CONFIG_ALPHA_CIA) += core_cia.o + +obj-$(CONFIG_ALPHA_IRONGATE) += core_irongate.o +obj-$(CONFIG_ALPHA_LCA) += core_lca.o +obj-$(CONFIG_ALPHA_MCPCIA) += core_mcpcia.o +obj-$(CONFIG_ALPHA_T2) += core_t2.o +obj-$(CONFIG_ALPHA_TSUNAMI) += core_tsunami.o +obj-$(CONFIG_ALPHA_TITAN) += core_titan.o +obj-$(CONFIG_ALPHA_POLARIS) += core_polaris.o +obj-$(CONFIG_ALPHA_WILDFIRE) += core_wildfire.o # Board support ifneq ($(CONFIG_ALPHA_ALCOR)$(CONFIG_ALPHA_XLT),) -O_OBJS += sys_alcor.o +obj-y += sys_alcor.o endif ifneq ($(CONFIG_ALPHA_CABRIOLET)$(CONFIG_ALPHA_EB164)$(CONFIG_ALPHA_EB66P)$(CONFIG_ALPHA_LX164)$(CONFIG_ALPHA_PC164),) -O_OBJS += sys_cabriolet.o -endif -ifdef CONFIG_ALPHA_DP264 -O_OBJS += sys_dp264.o -endif -ifdef CONFIG_ALPHA_TITAN -O_OBJS += sys_titan.o +obj-y += sys_cabriolet.o endif + +obj-$(CONFIG_ALPHA_DP264) += sys_dp264.o +obj-$(CONFIG_ALPHA_TITAN) += sys_titan.o + ifneq ($(CONFIG_ALPHA_EB64P)$(CONFIG_ALPHA_EB66),) -O_OBJS += sys_eb64p.o -endif -ifdef CONFIG_ALPHA_EIGER -O_OBJS += sys_eiger.o -endif -ifdef CONFIG_ALPHA_JENSEN -O_OBJS += sys_jensen.o -endif -ifdef CONFIG_ALPHA_MIATA -O_OBJS += sys_miata.o -endif -ifdef CONFIG_ALPHA_MIKASA -O_OBJS += sys_mikasa.o -endif -ifdef CONFIG_ALPHA_NAUTILUS -O_OBJS += sys_nautilus.o -endif -ifdef CONFIG_ALPHA_NORITAKE -O_OBJS += sys_noritake.o -endif -ifdef CONFIG_ALPHA_RAWHIDE -O_OBJS += sys_rawhide.o -endif -ifdef CONFIG_ALPHA_RUFFIAN -O_OBJS += sys_ruffian.o -endif -ifdef CONFIG_ALPHA_RX164 -O_OBJS += sys_rx164.o -endif -ifdef CONFIG_ALPHA_SABLE -O_OBJS += sys_sable.o +obj-y += sys_eb64p.o endif + +obj-$(CONFIG_ALPHA_EIGER) += sys_eiger.o +obj-$(CONFIG_ALPHA_JENSEN) += sys_jensen.o +obj-$(CONFIG_ALPHA_MIATA) += sys_miata.o +obj-$(CONFIG_ALPHA_MIKASA) += sys_mikasa.o +obj-$(CONFIG_ALPHA_NAUTILUS) += sys_nautilus.o +obj-$(CONFIG_ALPHA_NORITAKE) += sys_noritake.o +obj-$(CONFIG_ALPHA_RAWHIDE) += sys_rawhide.o +obj-$(CONFIG_ALPHA_RUFFIAN) += sys_ruffian.o +obj-$(CONFIG_ALPHA_RX164) += sys_rx164.o +obj-$(CONFIG_ALPHA_SABLE) += sys_sable.o + ifneq ($(CONFIG_ALPHA_BOOK1)$(CONFIG_ALPHA_AVANTI)$(CONFIG_ALPHA_NONAME)$(CONFIG_ALPHA_P2K)$(CONFIG_ALPHA_XL),) -O_OBJS += sys_sio.o -endif -ifdef CONFIG_ALPHA_SX164 -O_OBJS += sys_sx164.o -endif -ifdef CONFIG_ALPHA_TAKARA -O_OBJS += sys_takara.o -endif -ifdef CONFIG_ALPHA_WILDFIRE -O_OBJS += sys_wildfire.o +obj-y += sys_sio.o endif -endif # GENERIC +obj-$(CONFIG_ALPHA_SX164) += sys_sx164.o +obj-$(CONFIG_ALPHA_TAKARA) += sys_takara.o +obj-$(CONFIG_ALPHA_WILDFIRE) += sys_wildfire.o -O_OBJS += $(L_TARGET) +endif # GENERIC all: kernel.o head.o diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index dd882dc14..94a3872c3 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -106,8 +106,9 @@ static inline void __init smp_store_cpu_info(int cpuid) { cpu_data[cpuid].loops_per_sec = loops_per_sec; - cpu_data[cpuid].last_asn - = (cpuid << WIDTH_HARDWARE_ASN) + ASN_FIRST_VERSION; + cpu_data[cpuid].last_asn = ASN_FIRST_VERSION; + cpu_data[cpuid].need_new_asn = 0; + cpu_data[cpuid].asn_lock = 0; local_irq_count(cpuid) = 0; local_bh_count(cpuid) = 0; } @@ -898,12 +899,16 @@ flush_tlb_all(void) tbia(); } +#define asn_locked() (cpu_data[smp_processor_id()].asn_lock) + static void ipi_flush_tlb_mm(void *x) { struct mm_struct *mm = (struct mm_struct *) x; - if (mm == current->active_mm) + if (mm == current->active_mm && !asn_locked()) flush_tlb_current(mm); + else + flush_tlb_other(mm); } void @@ -911,10 +916,18 @@ flush_tlb_mm(struct mm_struct *mm) { if (mm == current->active_mm) { flush_tlb_current(mm); - if (atomic_read(&mm->mm_users) <= 1) + if (atomic_read(&mm->mm_users) <= 1) { + int i, cpu, this_cpu = smp_processor_id(); + for (i = 0; i < smp_num_cpus; i++) { + cpu = cpu_logical_map(i); + if (cpu == this_cpu) + continue; + if (mm->context[cpu]) + mm->context[cpu] = 0; + } return; - } else - flush_tlb_other(mm); + } + } if (smp_call_function(ipi_flush_tlb_mm, mm, 1, 1)) { printk(KERN_CRIT "flush_tlb_mm: timed out\n"); @@ -931,8 +944,12 @@ static void ipi_flush_tlb_page(void *x) { struct flush_tlb_page_struct *data = (struct flush_tlb_page_struct *)x; - if (data->mm == current->active_mm) - flush_tlb_current_page(data->mm, data->vma, data->addr); + struct mm_struct * mm = data->mm; + + if (mm == current->active_mm && !asn_locked()) + flush_tlb_current_page(mm, data->vma, data->addr); + else + flush_tlb_other(mm); } void @@ -943,10 +960,18 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) if (mm == current->active_mm) { flush_tlb_current_page(mm, vma, addr); - if (atomic_read(&mm->mm_users) <= 1) + if (atomic_read(&mm->mm_users) <= 1) { + int i, cpu, this_cpu = smp_processor_id(); + for (i = 0; i < smp_num_cpus; i++) { + cpu = cpu_logical_map(i); + if (cpu == this_cpu) + continue; + if (mm->context[cpu]) + mm->context[cpu] = 0; + } return; - } else - flush_tlb_other(mm); + } + } data.vma = vma; data.mm = mm; @@ -968,8 +993,10 @@ static void ipi_flush_icache_page(void *x) { struct mm_struct *mm = (struct mm_struct *) x; - if (mm == current->active_mm) + if (mm == current->active_mm && !asn_locked()) __load_new_mm_context(mm); + else + flush_tlb_other(mm); } void @@ -980,11 +1007,19 @@ flush_icache_page(struct vm_area_struct *vma, struct page *page) if ((vma->vm_flags & VM_EXEC) == 0) return; - mm->context = 0; if (mm == current->active_mm) { __load_new_mm_context(mm); - if (atomic_read(&mm->mm_users) <= 1) + if (atomic_read(&mm->mm_users) <= 1) { + int i, cpu, this_cpu = smp_processor_id(); + for (i = 0; i < smp_num_cpus; i++) { + cpu = cpu_logical_map(i); + if (cpu == this_cpu) + continue; + if (mm->context[cpu]) + mm->context[cpu] = 0; + } return; + } } if (smp_call_function(ipi_flush_icache_page, mm, 1, 1)) { diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index cb23a987d..1e3e485b5 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -35,7 +35,7 @@ OBJS = __divqu.o __remqu.o __divlu.o __remlu.o \ $(ev6)stxcpy.o \ $(ev6)stxncpy.o \ $(ev67)strchr.o \ - strrchr.o \ + $(ev67)strrchr.o \ $(ev6)memchr.o \ $(ev6)copy_user.o \ $(ev6)clear_user.o \ diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S index 7ebcbc27b..c708a6fb9 100644 --- a/arch/alpha/lib/ev6-memcpy.S +++ b/arch/alpha/lib/ev6-memcpy.S @@ -76,7 +76,7 @@ $single_head_quad: $do_unroll: addq $16, 64, $7 # E : Initial (+1 trip) wh64 address - cmple $18, 63, $1 # E : Can we go through the unrolled loop? + cmple $18, 127, $1 # E : Can we go through the unrolled loop? bne $1, $tail_quads # U : Nope nop # E : diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S new file mode 100644 index 000000000..7fe1be0e5 --- /dev/null +++ b/arch/alpha/lib/ev67-strrchr.S @@ -0,0 +1,109 @@ +/* + * arch/alpha/lib/ev67-strrchr.S + * 21264 version by Rick Gorton + * + * Finds length of a 0-terminated string. Optimized for the + * Alpha architecture: + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * + * Much of the information about 21264 scheduling/coding comes from: + * Compiler Writer's Guide for the Alpha 21264 + * abbreviated as 'CWG' in other comments here + * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html + * Scheduling notation: + * E - either cluster + * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 + * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 + */ + + +#include + + .set noreorder + .set noat + + .align 4 + .ent strrchr + .globl strrchr +strrchr: + .frame sp, 0, ra + .prologue 0 + + and a1, 0xff, t2 # E : 00000000000000ch + insbl a1, 1, t4 # U : 000000000000ch00 + insbl a1, 2, t5 # U : 0000000000ch0000 + ldq_u t0, 0(a0) # L : load first quadword Latency=3 + + mov zero, t6 # E : t6 is last match aligned addr + or t2, t4, a1 # E : 000000000000chch + sll t5, 8, t3 # U : 00000000ch000000 + mov zero, t8 # E : t8 is last match byte compare mask + + andnot a0, 7, v0 # E : align source addr + or t5, t3, t3 # E : 00000000chch0000 + sll a1, 32, t2 # U : 0000chch00000000 + sll a1, 48, t4 # U : chch000000000000 + + or t4, a1, a1 # E : chch00000000chch + or t2, t3, t2 # E : 0000chchchch0000 + or a1, t2, a1 # E : chchchchchchchch + lda t5, -1 # E : build garbage mask + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + mskqh t5, a0, t4 # E : Complete garbage mask + xor t0, a1, t2 # E : make bytes == c zero + cmpbge zero, t4, t4 # E : bits set iff byte is garbage + + cmpbge zero, t2, t3 # E : bits set iff byte == c + andnot t1, t4, t1 # E : clear garbage from null test + andnot t3, t4, t3 # E : clear garbage from char test + bne t1, $eos # U : did we already hit the terminator? + + /* Character search main loop */ +$loop: + ldq t0, 8(v0) # L : load next quadword + cmovne t3, v0, t6 # E : save previous comparisons match + nop # : Latency=2, extra map slot (keep nop with cmov) + nop + + cmovne t3, t3, t8 # E : Latency=2, extra map slot + nop # : keep with cmovne + addq v0, 8, v0 # E : + xor t0, a1, t2 # E : + + cmpbge zero, t0, t1 # E : bits set iff byte == zero + cmpbge zero, t2, t3 # E : bits set iff byte == c + beq t1, $loop # U : if we havnt seen a null, loop + nop + + /* Mask out character matches after terminator */ +$eos: + negq t1, t4 # E : isolate first null byte match + and t1, t4, t4 # E : + subq t4, 1, t5 # E : build a mask of the bytes upto... + or t4, t5, t4 # E : ... and including the null + + and t3, t4, t3 # E : mask out char matches after null + cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot + nop # : Keep with cmovne + nop + + cmovne t3, v0, t6 # E : + nop # : Keep with cmovne + /* Locate the address of the last matched character */ + ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0) + nop + + cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen + nop # E : hide the cmov latency (2) behind ctlz latency + lda t5, 0x3f($31) # E : + subq t5, t2, t5 # E : Normalize leading zero count + + addq t6, t5, v0 # E : and add to quadword address + ret # L0 : Latency=3 + nop + nop + + .end strrchr diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S index 3c8567e4e..73aed9253 100644 --- a/arch/alpha/lib/memmove.S +++ b/arch/alpha/lib/memmove.S @@ -26,12 +26,16 @@ memmove: bne $1,memcpy and $2,7,$2 /* Test for src/dest co-alignment. */ - bne $2,$misaligned + and $16,7,$1 + cmpule $16,$17,$3 + bne $3,$memmove_up /* dest < src */ and $4,7,$1 - beq $1,$skip_aligned_byte_loop_head + bne $2,$misaligned_dn + unop + beq $1,$skip_aligned_byte_loop_head_dn -$aligned_byte_loop_head: +$aligned_byte_loop_head_dn: lda $4,-1($4) lda $5,-1($5) unop @@ -48,13 +52,13 @@ $aligned_byte_loop_head: and $4,7,$6 stq_u $1,0($4) - bne $6,$aligned_byte_loop_head + bne $6,$aligned_byte_loop_head_dn -$skip_aligned_byte_loop_head: +$skip_aligned_byte_loop_head_dn: lda $18,-8($18) - blt $18,$skip_aligned_word_loop + blt $18,$skip_aligned_word_loop_dn -$aligned_word_loop: +$aligned_word_loop_dn: ldq $1,-8($5) nop lda $5,-8($5) @@ -63,22 +67,22 @@ $aligned_word_loop: stq $1,-8($4) nop lda $4,-8($4) - bge $18,$aligned_word_loop + bge $18,$aligned_word_loop_dn -$skip_aligned_word_loop: +$skip_aligned_word_loop_dn: lda $18,8($18) - bgt $18,$byte_loop_tail + bgt $18,$byte_loop_tail_dn unop ret $31,($26),1 .align 4 -$misaligned: +$misaligned_dn: nop fnop unop beq $18,$egress -$byte_loop_tail: +$byte_loop_tail_dn: ldq_u $3,-1($5) ldq_u $2,-1($4) lda $5,-1($5) @@ -91,8 +95,77 @@ $byte_loop_tail: bis $1,$2,$1 stq_u $1,0($4) + bgt $18,$byte_loop_tail_dn + br $egress + +$memmove_up: + mov $16,$4 + mov $17,$5 + bne $2,$misaligned_up + beq $1,$skip_aligned_byte_loop_head_up + +$aligned_byte_loop_head_up: + unop + ble $18,$egress + ldq_u $3,0($5) + ldq_u $2,0($4) + + lda $18,-1($18) + extbl $3,$5,$1 + insbl $1,$4,$1 + mskbl $2,$4,$2 + + bis $1,$2,$1 + lda $5,1($5) + stq_u $1,0($4) + lda $4,1($4) + + and $4,7,$6 + bne $6,$aligned_byte_loop_head_up + +$skip_aligned_byte_loop_head_up: + lda $18,-8($18) + blt $18,$skip_aligned_word_loop_up + +$aligned_word_loop_up: + ldq $1,0($5) + nop + lda $5,8($5) + lda $18,-8($18) + + stq $1,0($4) + nop + lda $4,8($4) + bge $18,$aligned_word_loop_up + +$skip_aligned_word_loop_up: + lda $18,8($18) + bgt $18,$byte_loop_tail_up + unop + ret $31,($26),1 + + .align 4 +$misaligned_up: + nop + fnop + unop + beq $18,$egress + +$byte_loop_tail_up: + ldq_u $3,0($5) + ldq_u $2,0($4) + lda $18,-1($18) + extbl $3,$5,$1 + + insbl $1,$4,$1 + mskbl $2,$4,$2 + bis $1,$2,$1 + stq_u $1,0($4) + + lda $5,1($5) + lda $4,1($4) nop - bgt $18,$byte_loop_tail + bgt $18,$byte_loop_tail_up $egress: ret $31,($26),1 diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile index 91e5ba660..4486b79e3 100644 --- a/arch/alpha/math-emu/Makefile +++ b/arch/alpha/math-emu/Makefile @@ -1,18 +1,22 @@ # # Makefile for the FPU instruction emulation. # -# Note! Dependencies are done automagically by 'make dep', which also -# removes any old dependencies. DON'T put your own dependencies here -# unless it's something special (ie not a .c file). -# -# Note 2! The CFLAGS definition is now in the main makefile... -O_TARGET := math-emu.o -O_OBJS := math.o qrnnd.o CFLAGS += -I. -I$(TOPDIR)/include/math-emu -w -ifeq ($(CONFIG_MATHEMU),m) -M_OBJS := $(O_TARGET) +ifeq ($(CONFIG_MATHEMU),y) + +O_TARGET := math-emu.o +obj-y := math.o qrnnd.o + +else + +list-multi := math-emu.o +math-emu-objs := math.o qrnnd.o +obj-m := math-emu.o +math-emu.o: $(math-emu-objs) + $(LD) -r -o $@ $(math-emu-objs) + endif include $(TOPDIR)/Rules.make diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index a5140ce1c..ddc677339 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -45,7 +45,7 @@ __load_new_mm_context(struct mm_struct *next_mm) unsigned long mmc; mmc = __get_new_mm_context(next_mm, smp_processor_id()); - next_mm->context = mmc; + next_mm->context[smp_processor_id()] = mmc; current->thread.asn = mmc & HARDWARE_ASN_MASK; current->thread.ptbr = ((unsigned long) next_mm->pgd - IDENT_ADDR) >> PAGE_SHIFT; -- cgit v1.2.3