diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-08-28 22:00:09 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-08-28 22:00:09 +0000 |
commit | 1a1d77dd589de5a567fa95e36aa6999c704ceca4 (patch) | |
tree | 141e31f89f18b9fe0831f31852e0435ceaccafc5 /arch/ia64 | |
parent | fb9c690a18b3d66925a65b17441c37fa14d4370b (diff) |
Merge with 2.4.0-test7.
Diffstat (limited to 'arch/ia64')
36 files changed, 1932 insertions, 698 deletions
diff --git a/arch/ia64/config.in b/arch/ia64/config.in index 8b8dd761c..0fdf86c19 100644 --- a/arch/ia64/config.in +++ b/arch/ia64/config.in @@ -18,15 +18,16 @@ mainmenu_option next_comment comment 'General setup' define_bool CONFIG_IA64 y +define_bool CONFIG_SWIOTLB y # for now... define_bool CONFIG_ISA n define_bool CONFIG_SBUS n choice 'IA-64 system type' \ - "Generic CONFIG_IA64_GENERIC \ + "generic CONFIG_IA64_GENERIC \ + DIG-compliant CONFIG_IA64_DIG \ HP-simulator CONFIG_IA64_HP_SIM \ - SN1-simulator CONFIG_IA64_SGI_SN1_SIM \ - DIG-compliant CONFIG_IA64_DIG" Generic + SN1-simulator CONFIG_IA64_SGI_SN1_SIM" generic choice 'Kernel page size' \ "4KB CONFIG_IA64_PAGE_SIZE_4KB \ @@ -38,16 +39,18 @@ if [ "$CONFIG_IA64_DIG" = "y" ]; then define_bool CONFIG_ITANIUM y define_bool CONFIG_IA64_BRL_EMU y bool ' Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC - bool ' Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC + if [ "$CONFIG_ITANIUM_ASTEP_SPECIFIC" = "y" ]; then + bool ' Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC + fi + bool ' Enable Itanium B-step specific code' CONFIG_ITANIUM_BSTEP_SPECIFIC + if [ "$CONFIG_ITANIUM_BSTEP_SPECIFIC" = "y" ]; then + bool ' Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC + fi + bool ' Force interrupt redirection' CONFIG_IA64_HAVE_IRQREDIR bool ' Enable use of global TLB purge instruction (ptc.g)' CONFIG_ITANIUM_PTCG bool ' Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS bool ' Enable AzusA hacks' CONFIG_IA64_AZUSA_HACKS - bool ' Emulate PAL/SAL/EFI firmware' CONFIG_IA64_FW_EMU - bool ' Enable IA64 Machine Check Abort' CONFIG_IA64_MCA -fi - -if [ "$CONFIG_IA64_GENERIC" = "y" ]; then - define_bool CONFIG_IA64_SOFTSDV_HACKS y + bool ' Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA fi if [ "$CONFIG_IA64_SGI_SN1_SIM" = "y" ]; then @@ -59,7 +62,7 @@ define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore. bool 'SMP support' CONFIG_SMP bool 'Performance monitor support' CONFIG_PERFMON -bool '/proc/palinfo support' CONFIG_IA64_PALINFO +bool '/proc/pal support' CONFIG_IA64_PALINFO bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC @@ -161,9 +164,9 @@ source drivers/char/Config.in #source drivers/misc/Config.in -source fs/Config.in +source drivers/media/Config.in -source fs/nls/Config.in +source fs/Config.in if [ "$CONFIG_VT" = "y" ]; then mainmenu_option next_comment @@ -188,6 +191,7 @@ fi endmenu source drivers/usb/Config.in +source drivers/input/Config.in fi # !HP_SIM diff --git a/arch/ia64/dig/iosapic.c b/arch/ia64/dig/iosapic.c index 2426a0193..7d9a084fd 100644 --- a/arch/ia64/dig/iosapic.c +++ b/arch/ia64/dig/iosapic.c @@ -22,12 +22,14 @@ #include <linux/string.h> #include <linux/irq.h> +#include <asm/acpi-ext.h> +#include <asm/delay.h> #include <asm/io.h> #include <asm/iosapic.h> +#include <asm/machvec.h> +#include <asm/processor.h> #include <asm/ptrace.h> #include <asm/system.h> -#include <asm/delay.h> -#include <asm/processor.h> #undef DEBUG_IRQ_ROUTING @@ -315,10 +317,6 @@ dig_irq_init (void) */ outb(0xff, 0xA1); outb(0xff, 0x21); - -#ifndef CONFIG_IA64_DIG - iosapic_init(IO_SAPIC_DEFAULT_ADDR); -#endif } void @@ -337,15 +335,23 @@ dig_pci_fixup (void) if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ struct pci_dev * bridge = dev->bus->self; - /* do the bridge swizzle... */ - pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = iosapic_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); + /* allow for multiple bridges on an adapter */ + do { + /* do the bridge swizzle... */ + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + irq = iosapic_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), pin); + } while (irq < 0 && (bridge = bridge->bus->self)); if (irq >= 0) printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n", bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq); + else + printk(KERN_WARNING + "PCI: Couldn't map irq for B%d,I%d,P%d\n", + bridge->bus->number, PCI_SLOT(bridge->devfn), + pin); } if (irq >= 0) { printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n", @@ -361,3 +367,34 @@ dig_pci_fixup (void) dev->irq = 15; /* Spurious interrupts */ } } + +/* + * Register an IOSAPIC discovered via ACPI. + */ +void __init +dig_register_iosapic (acpi_entry_iosapic_t *iosapic) +{ + unsigned int ver, v; + int l, max_pin; + + ver = iosapic_version(iosapic->address); + max_pin = (ver >> 16) & 0xff; + + printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", + (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, + iosapic->irq_base, iosapic->irq_base + max_pin); + + for (l = 0; l <= max_pin; l++) { + v = iosapic->irq_base + l; + if (v < 16) + v = isa_irq_to_vector(v); + if (v > IA64_MAX_VECTORED_IRQ) { + printk(" !!! bad IOSAPIC interrupt vector: %u\n", v); + continue; + } + /* XXX Check for IOSAPIC collisions */ + iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0); + iosapic_baseirq(v) = iosapic->irq_base; + } + iosapic_init(iosapic->address, iosapic->irq_base); +} diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c index 640412d7e..4d2452745 100644 --- a/arch/ia64/dig/machvec.c +++ b/arch/ia64/dig/machvec.c @@ -1,4 +1,2 @@ +#define MACHVEC_PLATFORM_NAME dig #include <asm/machvec_init.h> -#include <asm/machvec_dig.h> - -MACHVEC_DEFINE(dig) diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c index 45c1f96f5..fa48254cc 100644 --- a/arch/ia64/dig/setup.c +++ b/arch/ia64/dig/setup.c @@ -24,10 +24,6 @@ #include <asm/machvec.h> #include <asm/system.h> -#ifdef CONFIG_IA64_FW_EMU -# include "../../kernel/fw-emu.c" -#endif - /* * This is here so we can use the CMOS detection in ide-probe.c to * determine what drives are present. In theory, we don't need this diff --git a/arch/ia64/hp/hpsim_machvec.c b/arch/ia64/hp/hpsim_machvec.c index 7d78f4961..76af3b4e2 100644 --- a/arch/ia64/hp/hpsim_machvec.c +++ b/arch/ia64/hp/hpsim_machvec.c @@ -1,4 +1,2 @@ +#define MACHVEC_PLATFORM_NAME hpsim #include <asm/machvec_init.h> -#include <asm/machvec_hpsim.h> - -MACHVEC_DEFINE(hpsim) diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index a9dc378cf..ad963b92f 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -73,7 +73,7 @@ GLOBAL_ENTRY(ia32_trace_syscall) END(ia32_trace_syscall) GLOBAL_ENTRY(sys32_vfork) - alloc r16=ar.pfs,2,2,3,0;; + alloc r16=ar.pfs,2,2,4,0;; mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD // out0 = clone_flags br.cond.sptk.few .fork1 // do the work END(sys32_vfork) @@ -105,7 +105,7 @@ END(sys32_fork) .align 8 .globl ia32_syscall_table ia32_syscall_table: - data8 sys_ni_syscall /* 0 - old "setup(" system call*/ + data8 sys32_ni_syscall /* 0 - old "setup(" system call*/ data8 sys_exit data8 sys32_fork data8 sys_read @@ -122,25 +122,25 @@ ia32_syscall_table: data8 sys_mknod data8 sys_chmod /* 15 */ data8 sys_lchown - data8 sys_ni_syscall /* old break syscall holder */ - data8 sys_ni_syscall + data8 sys32_ni_syscall /* old break syscall holder */ + data8 sys32_ni_syscall data8 sys_lseek data8 sys_getpid /* 20 */ data8 sys_mount data8 sys_oldumount data8 sys_setuid data8 sys_getuid - data8 sys_ni_syscall /* sys_stime is not supported on IA64 */ /* 25 */ + data8 sys32_ni_syscall /* sys_stime is not supported on IA64 */ /* 25 */ data8 sys32_ptrace data8 sys32_alarm - data8 sys_ni_syscall - data8 sys_ni_syscall + data8 sys32_ni_syscall + data8 sys32_ni_syscall data8 ia32_utime /* 30 */ - data8 sys_ni_syscall /* old stty syscall holder */ - data8 sys_ni_syscall /* old gtty syscall holder */ + data8 sys32_ni_syscall /* old stty syscall holder */ + data8 sys32_ni_syscall /* old gtty syscall holder */ data8 sys_access data8 sys_nice - data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */ + data8 sys32_ni_syscall /* 35 */ /* old ftime syscall holder */ data8 sys_sync data8 sys_kill data8 sys_rename @@ -149,22 +149,22 @@ ia32_syscall_table: data8 sys_dup data8 sys32_pipe data8 sys32_times - data8 sys_ni_syscall /* old prof syscall holder */ + data8 sys32_ni_syscall /* old prof syscall holder */ data8 sys_brk /* 45 */ data8 sys_setgid data8 sys_getgid - data8 sys_ni_syscall + data8 sys32_ni_syscall data8 sys_geteuid data8 sys_getegid /* 50 */ data8 sys_acct data8 sys_umount /* recycled never used phys( */ - data8 sys_ni_syscall /* old lock syscall holder */ + data8 sys32_ni_syscall /* old lock syscall holder */ data8 ia32_ioctl - data8 sys_fcntl /* 55 */ - data8 sys_ni_syscall /* old mpx syscall holder */ + data8 sys32_fcntl /* 55 */ + data8 sys32_ni_syscall /* old mpx syscall holder */ data8 sys_setpgid - data8 sys_ni_syscall /* old ulimit syscall holder */ - data8 sys_ni_syscall + data8 sys32_ni_syscall /* old ulimit syscall holder */ + data8 sys32_ni_syscall data8 sys_umask /* 60 */ data8 sys_chroot data8 sys_ustat @@ -172,12 +172,12 @@ ia32_syscall_table: data8 sys_getppid data8 sys_getpgrp /* 65 */ data8 sys_setsid - data8 sys_ni_syscall - data8 sys_ni_syscall - data8 sys_ni_syscall + data8 sys32_sigaction + data8 sys32_ni_syscall + data8 sys32_ni_syscall data8 sys_setreuid /* 70 */ data8 sys_setregid - data8 sys_ni_syscall + data8 sys32_ni_syscall data8 sys_sigpending data8 sys_sethostname data8 sys32_setrlimit /* 75 */ @@ -189,7 +189,7 @@ ia32_syscall_table: data8 sys_setgroups data8 old_select data8 sys_symlink - data8 sys_ni_syscall + data8 sys32_ni_syscall data8 sys_readlink /* 85 */ data8 sys_uselib data8 sys_swapon @@ -203,7 +203,7 @@ ia32_syscall_table: data8 sys_fchown /* 95 */ data8 sys_getpriority data8 sys_setpriority - data8 sys_ni_syscall /* old profil syscall holder */ + data8 sys32_ni_syscall /* old profil syscall holder */ data8 sys32_statfs data8 sys32_fstatfs /* 100 */ data8 sys_ioperm @@ -214,11 +214,11 @@ ia32_syscall_table: data8 sys32_newstat data8 sys32_newlstat data8 sys32_newfstat - data8 sys_ni_syscall + data8 sys32_ni_syscall data8 sys_iopl /* 110 */ data8 sys_vhangup - data8 sys_ni_syscall // used to be sys_idle - data8 sys_ni_syscall + data8 sys32_ni_syscall // used to be sys_idle + data8 sys32_ni_syscall data8 sys32_wait4 data8 sys_swapoff /* 115 */ data8 sys_sysinfo @@ -242,7 +242,7 @@ ia32_syscall_table: data8 sys_bdflush data8 sys_sysfs /* 135 */ data8 sys_personality - data8 sys_ni_syscall /* for afs_syscall */ + data8 sys32_ni_syscall /* for afs_syscall */ data8 sys_setfsuid data8 sys_setfsgid data8 sys_llseek /* 140 */ @@ -293,8 +293,8 @@ ia32_syscall_table: data8 sys_capset /* 185 */ data8 sys_sigaltstack data8 sys_sendfile - data8 sys_ni_syscall /* streams1 */ - data8 sys_ni_syscall /* streams2 */ + data8 sys32_ni_syscall /* streams1 */ + data8 sys32_ni_syscall /* streams2 */ data8 sys32_vfork /* 190 */ /* * CAUTION: If any system calls are added beyond this point diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index cbec84f2b..a6bf4a8d8 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -74,10 +74,14 @@ nargs(unsigned int arg, char **ap) n = 0; do { - if ((err = get_user(addr, (int *)A(arg))) != 0) - return(err); - if (ap) - *ap++ = (char *)A(addr); + err = get_user(addr, (int *)A(arg)); + if (IS_ERR(err)) + return err; + if (ap) { /* no access_ok needed, we allocated */ + err = __put_user((char *)A(addr), ap++); + if (IS_ERR(err)) + return err; + } arg += sizeof(unsigned int); n++; } while (addr); @@ -101,7 +105,11 @@ int stack) int na, ne, r, len; na = nargs(argv, NULL); + if (IS_ERR(na)) + return(na); ne = nargs(envp, NULL); + if (IS_ERR(ne)) + return(ne); len = (na + ne + 2) * sizeof(*av); /* * kmalloc won't work because the `sys_exec' code will attempt @@ -121,12 +129,21 @@ int stack) if (IS_ERR(av)) return (long)av; ae = av + na + 1; - av[na] = (char *)0; - ae[ne] = (char *)0; - (void)nargs(argv, av); - (void)nargs(envp, ae); + r = __put_user(0, (av + na)); + if (IS_ERR(r)) + goto out; + r = __put_user(0, (ae + ne)); + if (IS_ERR(r)) + goto out; + r = nargs(argv, av); + if (IS_ERR(r)) + goto out; + r = nargs(envp, ae); + if (IS_ERR(r)) + goto out; r = sys_execve(filename, av, ae, regs); if (IS_ERR(r)) +out: sys_munmap((unsigned long) av, len); return(r); } @@ -711,7 +728,8 @@ struct readdir32_callback { }; static int -filldir32 (void *__buf, const char *name, int namlen, off_t offset, ino_t ino) +filldir32 (void *__buf, const char *name, int namlen, off_t offset, ino_t ino, + unsigned int d_type) { struct linux32_dirent * dirent; struct getdents32_callback * buf = (struct getdents32_callback *) __buf; @@ -959,150 +977,85 @@ sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp) } struct iovec32 { unsigned int iov_base; int iov_len; }; +asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long); +asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long); -typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *); - -static long -do_readv_writev32(int type, struct file *file, const struct iovec32 *vector, - u32 count) +static struct iovec * +get_iovec32(struct iovec32 *iov32, struct iovec *iov_buf, u32 count, int type) { - unsigned long tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *ivp; - struct inode *inode; - long retval, i; - IO_fn_t fn; + int i; + u32 buf, len; + struct iovec *ivp, *iov; + + /* Get the "struct iovec" from user memory */ - /* First get the "struct iovec" from user memory and - * verify all the pointers - */ if (!count) return 0; - if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count)) - return -EFAULT; + if(verify_area(VERIFY_READ, iov32, sizeof(struct iovec32)*count)) + return(struct iovec *)0; if (count > UIO_MAXIOV) - return -EINVAL; + return(struct iovec *)0; if (count > UIO_FASTIOV) { iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); if (!iov) - return -ENOMEM; - } + return((struct iovec *)0); + } else + iov = iov_buf; - tot_len = 0; - i = count; ivp = iov; - while(i > 0) { - u32 len; - u32 buf; - - __get_user(len, &vector->iov_len); - __get_user(buf, &vector->iov_base); - tot_len += len; + for (i = 0; i < count; i++) { + if (__get_user(len, &iov32->iov_len) || + __get_user(buf, &iov32->iov_base)) { + if (iov != iov_buf) + kfree(iov); + return((struct iovec *)0); + } + if (verify_area(type, (void *)A(buf), len)) { + if (iov != iov_buf) + kfree(iov); + return((struct iovec *)0); + } ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t) len; - vector++; - ivp++; - i--; - } - - inode = file->f_dentry->d_inode; - /* VERIFY_WRITE actually means a read, as we write to user space */ - retval = locks_verify_area((type == VERIFY_WRITE - ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), - inode, file, file->f_pos, tot_len); - if (retval) { - if (iov != iovstack) - kfree(iov); - return retval; - } - - /* Then do the actual IO. Note that sockets need to be handled - * specially as they have atomicity guarantees and can handle - * iovec's natively - */ - if (inode->i_sock) { - int err; - err = sock_readv_writev(type, inode, file, iov, count, tot_len); - if (iov != iovstack) - kfree(iov); - return err; - } - - if (!file->f_op) { - if (iov != iovstack) - kfree(iov); - return -EINVAL; - } - /* VERIFY_WRITE actually means a read, as we write to user space */ - fn = file->f_op->read; - if (type == VERIFY_READ) - fn = (IO_fn_t) file->f_op->write; - ivp = iov; - while (count > 0) { - void * base; - int len, nr; - - base = ivp->iov_base; - len = ivp->iov_len; + ivp->iov_len = (__kernel_size_t)len; + iov32++; ivp++; - count--; - nr = fn(file, base, len, &file->f_pos); - if (nr < 0) { - if (retval) - break; - retval = nr; - break; - } - retval += nr; - if (nr != len) - break; } - if (iov != iovstack) - kfree(iov); - return retval; + return(iov); } asmlinkage long sys32_readv(int fd, struct iovec32 *vector, u32 count) { - struct file *file; - long ret = -EBADF; - - file = fget(fd); - if(!file) - goto bad_file; - - if(!(file->f_mode & 1)) - goto out; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov; + int ret; + mm_segment_t old_fs = get_fs(); - ret = do_readv_writev32(VERIFY_WRITE, file, - vector, count); -out: - fput(file); -bad_file: + if ((iov = get_iovec32(vector, iovstack, count, VERIFY_WRITE)) == (struct iovec *)0) + return -EFAULT; + set_fs(KERNEL_DS); + ret = sys_readv(fd, iov, count); + set_fs(old_fs); + if (iov != iovstack) + kfree(iov); return ret; } asmlinkage long sys32_writev(int fd, struct iovec32 *vector, u32 count) { - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if(!file) - goto bad_file; - - if(!(file->f_mode & 2)) - goto out; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov; + int ret; + mm_segment_t old_fs = get_fs(); - down(&file->f_dentry->d_inode->i_sem); - ret = do_readv_writev32(VERIFY_READ, file, - vector, count); - up(&file->f_dentry->d_inode->i_sem); -out: - fput(file); -bad_file: + if ((iov = get_iovec32(vector, iovstack, count, VERIFY_READ)) == (struct iovec *)0) + return -EFAULT; + set_fs(KERNEL_DS); + ret = sys_writev(fd, iov, count); + set_fs(old_fs); + if (iov != iovstack) + kfree(iov); return ret; } @@ -1173,21 +1126,22 @@ struct msghdr32 { static inline int shape_msg(struct msghdr *mp, struct msghdr32 *mp32) { + int ret; unsigned int i; if (!access_ok(VERIFY_READ, mp32, sizeof(*mp32))) return(-EFAULT); - __get_user(i, &mp32->msg_name); + ret = __get_user(i, &mp32->msg_name); mp->msg_name = (void *)A(i); - __get_user(mp->msg_namelen, &mp32->msg_namelen); - __get_user(i, &mp32->msg_iov); + ret |= __get_user(mp->msg_namelen, &mp32->msg_namelen); + ret |= __get_user(i, &mp32->msg_iov); mp->msg_iov = (struct iovec *)A(i); - __get_user(mp->msg_iovlen, &mp32->msg_iovlen); - __get_user(i, &mp32->msg_control); + ret |= __get_user(mp->msg_iovlen, &mp32->msg_iovlen); + ret |= __get_user(i, &mp32->msg_control); mp->msg_control = (void *)A(i); - __get_user(mp->msg_controllen, &mp32->msg_controllen); - __get_user(mp->msg_flags, &mp32->msg_flags); - return(0); + ret |= __get_user(mp->msg_controllen, &mp32->msg_controllen); + ret |= __get_user(mp->msg_flags, &mp32->msg_flags); + return(ret ? -EFAULT : 0); } /* @@ -2341,17 +2295,17 @@ restore_ia32_fpstate(struct task_struct *tsk, struct _fpstate_ia32 *save) { struct switch_stack *swp; struct pt_regs *ptp; - int i, tos; + int i, tos, ret; int fsrlo, fsrhi; if (!access_ok(VERIFY_READ, save, sizeof(*save))) return(-EIO); - __get_user(tsk->thread.fcr, (unsigned int *)&save->cw); - __get_user(fsrlo, (unsigned int *)&save->sw); - __get_user(fsrhi, (unsigned int *)&save->tag); + ret = __get_user(tsk->thread.fcr, (unsigned int *)&save->cw); + ret |= __get_user(fsrlo, (unsigned int *)&save->sw); + ret |= __get_user(fsrhi, (unsigned int *)&save->tag); tsk->thread.fsr = ((long)fsrhi << 32) | (long)fsrlo; - __get_user(tsk->thread.fir, (unsigned int *)&save->ipoff); - __get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff); + ret |= __get_user(tsk->thread.fir, (unsigned int *)&save->ipoff); + ret |= __get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff); /* * Stack frames start with 16-bytes of temp space */ @@ -2360,7 +2314,7 @@ restore_ia32_fpstate(struct task_struct *tsk, struct _fpstate_ia32 *save) tos = (tsk->thread.fsr >> 11) & 3; for (i = 0; i < 8; i++) get_fpreg(i, &save->_st[i], ptp, swp, tos); - return(0); + return(ret ? -EFAULT : 0); } asmlinkage long sys_ptrace(long, pid_t, unsigned long, unsigned long, long, long, long, long, long); @@ -2492,6 +2446,105 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, return ret; } +static inline int +get_flock32(struct flock *kfl, struct flock32 *ufl) +{ + int err; + + err = get_user(kfl->l_type, &ufl->l_type); + err |= __get_user(kfl->l_whence, &ufl->l_whence); + err |= __get_user(kfl->l_start, &ufl->l_start); + err |= __get_user(kfl->l_len, &ufl->l_len); + err |= __get_user(kfl->l_pid, &ufl->l_pid); + return err; +} + +static inline int +put_flock32(struct flock *kfl, struct flock32 *ufl) +{ + int err; + + err = __put_user(kfl->l_type, &ufl->l_type); + err |= __put_user(kfl->l_whence, &ufl->l_whence); + err |= __put_user(kfl->l_start, &ufl->l_start); + err |= __put_user(kfl->l_len, &ufl->l_len); + err |= __put_user(kfl->l_pid, &ufl->l_pid); + return err; +} + +extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, + unsigned long arg); + +asmlinkage long +sys32_fcntl(unsigned int fd, unsigned int cmd, int arg) +{ + struct flock f; + mm_segment_t old_fs; + long ret; + + switch (cmd) { + case F_GETLK: + case F_SETLK: + case F_SETLKW: + if(cmd != F_GETLK && get_flock32(&f, (struct flock32 *)((long)arg))) + return -EFAULT; + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_fcntl(fd, cmd, (unsigned long)&f); + set_fs(old_fs); + if(cmd == F_GETLK && put_flock32(&f, (struct flock32 *)((long)arg))) + return -EFAULT; + return ret; + default: + /* + * `sys_fcntl' lies about arg, for the F_SETOWN + * sub-function arg can have a negative value. + */ + return sys_fcntl(fd, cmd, (unsigned long)((long)arg)); + } +} + +asmlinkage long +sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset32_t mask; + + ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= __get_user(mask, &act->sa_mask); + if (ret) + return ret; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage long sys_ni_syscall(void); + +asmlinkage long +sys32_ni_syscall(int dummy0, int dummy1, int dummy2, int dummy3, + int dummy4, int dummy5, int dummy6, int dummy7, int stack) +{ + struct pt_regs *regs = (struct pt_regs *)&stack; + + printk("IA32 syscall #%d issued, maybe we should implement it\n", + (int)regs->r1); + return(sys_ni_syscall()); +} + #ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */ /* In order to reduce some races, while at the same time doing additional @@ -2545,61 +2598,6 @@ sys32_ioperm(u32 from, u32 num, int on) return sys_ioperm((unsigned long)from, (unsigned long)num, on); } -static inline int -get_flock(struct flock *kfl, struct flock32 *ufl) -{ - int err; - - err = get_user(kfl->l_type, &ufl->l_type); - err |= __get_user(kfl->l_whence, &ufl->l_whence); - err |= __get_user(kfl->l_start, &ufl->l_start); - err |= __get_user(kfl->l_len, &ufl->l_len); - err |= __get_user(kfl->l_pid, &ufl->l_pid); - return err; -} - -static inline int -put_flock(struct flock *kfl, struct flock32 *ufl) -{ - int err; - - err = __put_user(kfl->l_type, &ufl->l_type); - err |= __put_user(kfl->l_whence, &ufl->l_whence); - err |= __put_user(kfl->l_start, &ufl->l_start); - err |= __put_user(kfl->l_len, &ufl->l_len); - err |= __put_user(kfl->l_pid, &ufl->l_pid); - return err; -} - -extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg); - -asmlinkage long -sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case F_GETLK: - case F_SETLK: - case F_SETLKW: - { - struct flock f; - mm_segment_t old_fs; - long ret; - - if(get_flock(&f, (struct flock32 *)arg)) - return -EFAULT; - old_fs = get_fs(); set_fs (KERNEL_DS); - ret = sys_fcntl(fd, cmd, (unsigned long)&f); - set_fs (old_fs); - if(put_flock(&f, (struct flock32 *)arg)) - return -EFAULT; - return ret; - } - default: - return sys_fcntl(fd, cmd, (unsigned long)arg); - } -} - struct dqblk32 { __u32 dqb_bhardlimit; __u32 dqb_bsoftlimit; @@ -3862,40 +3860,6 @@ out: extern void check_pending(int signum); -asmlinkage long -sys32_sigaction (int sig, struct old_sigaction32 *act, - struct old_sigaction32 *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if(sig < 0) { - current->tss.new_signal = 1; - sig = -sig; - } - - if (act) { - old_sigset_t32 mask; - - ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler); - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - ret |= __get_user(mask, &act->sa_mask); - if (ret) - return ret; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - #ifdef CONFIG_MODULES extern asmlinkage unsigned long sys_create_module(const char *name_user, diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index dfba2529a..563c308ea 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -9,8 +9,8 @@ all: kernel.o head.o init_task.o -obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \ - pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ +obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \ + machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 20521da36..4bba56e1d 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -19,10 +19,11 @@ #include <linux/irq.h> #include <asm/acpi-ext.h> -#include <asm/page.h> #include <asm/efi.h> #include <asm/io.h> #include <asm/iosapic.h> +#include <asm/machvec.h> +#include <asm/page.h> #undef ACPI_DEBUG /* Guess what this does? */ @@ -75,47 +76,6 @@ acpi_lsapic(char *p) } /* - * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate - * base addresses. - */ -static void __init -acpi_iosapic(char *p) -{ - /* - * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet - * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be - * a means for platform_setup() to register ACPI handlers? - */ -#ifdef CONFIG_IA64_DIG - acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p; - unsigned int ver, v; - int l, max_pin; - - ver = iosapic_version(iosapic->address); - max_pin = (ver >> 16) & 0xff; - - printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", - (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, - iosapic->irq_base, iosapic->irq_base + max_pin); - - for (l = 0; l <= max_pin; l++) { - v = iosapic->irq_base + l; - if (v < 16) - v = isa_irq_to_vector(v); - if (v > IA64_MAX_VECTORED_IRQ) { - printk(" !!! bad IOSAPIC interrupt vector: %u\n", v); - continue; - } - /* XXX Check for IOSAPIC collisions */ - iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0); - iosapic_baseirq(v) = iosapic->irq_base; - } - iosapic_init(iosapic->address, iosapic->irq_base); -#endif -} - - -/* * Configure legacy IRQ information in iosapic_vector */ static void __init @@ -227,7 +187,7 @@ acpi_parse_msapic(acpi_sapic_t *msapic) break; case ACPI_ENTRY_IO_SAPIC: - acpi_iosapic(p); + platform_register_iosapic((acpi_entry_iosapic_t *) p); break; case ACPI_ENTRY_INT_SRC_OVERRIDE: diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index c4383b97f..d55835df6 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -33,9 +33,10 @@ extern efi_status_t efi_call_phys (void *, ...); struct efi efi; - static efi_runtime_services_t *runtime; +static unsigned long mem_limit = ~0UL; + static efi_status_t phys_get_time (efi_time_t *tm, efi_time_cap_t *tc) { @@ -169,15 +170,13 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg) case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: - if (md->phys_addr > 1024*1024*1024UL) { - printk("Warning: ignoring %luMB of memory above 1GB!\n", - md->num_pages >> 8); - md->type = EFI_UNUSABLE_MEMORY; - continue; - } - if (!(md->attribute & EFI_MEMORY_WB)) continue; + if (md->phys_addr + (md->num_pages << 12) > mem_limit) { + if (md->phys_addr > mem_limit) + continue; + md->num_pages = (mem_limit - md->phys_addr) >> 12; + } if (md->num_pages == 0) { printk("efi_memmap_walk: ignoring empty region at 0x%lx", md->phys_addr); @@ -224,8 +223,8 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg) * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor * Abstraction Layer chapter 11 in ADAG */ -static void -map_pal_code (void) +void +efi_map_pal_code (void) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; @@ -240,13 +239,14 @@ map_pal_code (void) for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { md = p; - if (md->type != EFI_PAL_CODE) continue; + if (md->type != EFI_PAL_CODE) + continue; if (++pal_code_count > 1) { printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n", md->phys_addr); continue; - } + } mask = ~((1 << _PAGE_SIZE_4M)-1); /* XXX should be dynamic? */ vaddr = PAGE_OFFSET + md->phys_addr; @@ -281,9 +281,28 @@ efi_init (void) efi_config_table_t *config_tables; efi_char16_t *c16; u64 efi_desc_size; - char vendor[100] = "unknown"; + char *cp, *end, vendor[100] = "unknown"; + extern char saved_command_line[]; int i; + /* it's too early to be able to use the standard kernel command line support... */ + for (cp = saved_command_line; *cp; ) { + if (memcmp(cp, "mem=", 4) == 0) { + cp += 4; + mem_limit = memparse(cp, &end) - 1; + if (end != cp) + break; + cp = end; + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } + if (mem_limit != ~0UL) + printk("Ignoring memory above %luMB\n", mem_limit >> 20); + efi.systab = __va(ia64_boot_param.efi_systab); /* @@ -359,7 +378,7 @@ efi_init (void) } #endif - map_pal_code(); + efi_map_pal_code(); } void diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index f9beac21d..e37bd0df8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -106,29 +106,19 @@ GLOBAL_ENTRY(ia64_switch_to) alloc r16=ar.pfs,1,0,0,0 DO_SAVE_SWITCH_STACK UNW(.body) - // disable interrupts to ensure atomicity for next few instructions: - mov r17=psr // M-unit - ;; - rsm psr.i // M-unit - dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff - ;; - srlz.d - ;; + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 ;; st8 [r22]=sp // save kernel stack pointer of old task ld8 sp=[r21] // load kernel stack pointer of new task and r20=in0,r18 // physical address of "current" ;; + mov ar.k6=r20 // copy "current" into ar.k6 mov r8=r13 // return pointer to previously running task mov r13=in0 // set "current" pointer - mov ar.k6=r20 // copy "current" into ar.k6 ;; - // restore interrupts - mov psr.l=r17 - ;; - srlz.d DO_LOAD_SWITCH_STACK( ) br.ret.sptk.few rp END(ia64_switch_to) @@ -1207,7 +1197,7 @@ sys_call_table: data8 sys_newlstat data8 sys_newfstat data8 sys_clone2 - data8 ia64_ni_syscall + data8 sys_getdents64 data8 ia64_ni_syscall // 1215 data8 ia64_ni_syscall data8 ia64_ni_syscall diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index b2abc48a4..bea14236d 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -181,7 +181,9 @@ END(ia64_save_debug_regs) GLOBAL_ENTRY(ia64_load_debug_regs) alloc r16=ar.pfs,1,0,0,0 +#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) lfetch.nta [in0] +#endif mov r20=ar.lc // preserve ar.lc add r19=IA64_NUM_DBG_REGS*8,in0 mov ar.lc=IA64_NUM_DBG_REGS-1 @@ -702,3 +704,74 @@ SET_REG(b4); SET_REG(b5); #endif /* CONFIG_IA64_BRL_EMU */ + +#ifdef CONFIG_SMP + + /* + * This routine handles spinlock contention. It uses a simple exponential backoff + * algorithm to reduce unnecessary bus traffic. The initial delay is selected from + * the low-order bits of the cycle counter (a cheap "randomizer"). I'm sure this + * could use additional tuning, especially on systems with a large number of CPUs. + * Also, I think the maximum delay should be made a function of the number of CPUs in + * the system. --davidm 00/08/05 + * + * WARNING: This is not a normal procedure. It gets called from C code without + * the compiler knowing about it. Thus, we must not use any scratch registers + * beyond those that were declared "clobbered" at the call-site (see spin_lock() + * macro). We may not even use the stacked registers, because that could overwrite + * output registers. Similarly, we can't use the scratch stack area as it may be + * in use, too. + * + * Inputs: + * ar.ccv = 0 (and available for use) + * r28 = available for use + * r29 = available for use + * r30 = non-zero (and available for use) + * r31 = address of lock we're trying to acquire + * p15 = available for use + */ + +# define delay r28 +# define timeout r29 +# define tmp r30 + +GLOBAL_ENTRY(ia64_spinlock_contention) + mov tmp=ar.itc + ;; + and delay=0x3f,tmp + ;; + +.retry: add timeout=tmp,delay + shl delay=delay,1 + ;; + dep delay=delay,r0,0,13 // limit delay to 8192 cycles + ;; + // delay a little... +.wait: sub tmp=tmp,timeout + or delay=0xf,delay // make sure delay is non-zero (otherwise we get stuck with 0) + ;; + cmp.lt p15,p0=tmp,r0 + mov tmp=ar.itc +(p15) br.cond.sptk .wait + ;; + ld1 tmp=[r31] + ;; + cmp.ne p15,p0=tmp,r0 + mov tmp=ar.itc +(p15) br.cond.sptk.few .retry // lock is still busy + ;; + // try acquiring lock (we know ar.ccv is still zero!): + mov tmp=1 + ;; + IA64_SEMFIX_INSN + cmpxchg1.acq tmp=[r31],tmp,ar.ccv + ;; + cmp.eq p15,p0=tmp,r0 + + mov tmp=ar.itc +(p15) br.ret.sptk.many b7 // got lock -> return + br .retry // still no luck, retry + +END(ia64_spinlock_contention) + +#endif diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 2e4ffe403..62e792612 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -18,6 +18,7 @@ EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strncat); EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(strtok); #include <linux/pci.h> @@ -37,6 +38,7 @@ EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL(kernel_thread); #ifdef CONFIG_SMP +#include <asm/hardirq.h> EXPORT_SYMBOL(synchronize_irq); #include <asm/smplock.h> diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 1ee2974b5..fe686db0e 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -117,6 +117,13 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs) { unsigned long bsp, sp; + /* + * Note: if the interrupt happened while executing in + * the context switch routine (ia64_switch_to), we may + * get a spurious stack overflow here. This is + * because the register and the memory stack are not + * switched atomically. + */ asm ("mov %0=ar.bsp" : "=r"(bsp)); asm ("mov %0=sp" : "=r"(sp)); diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 9f611c5b0..d1b599f77 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -170,33 +170,27 @@ ia64_ivt: * The ITLB basically does the same as the VHPT handler except * that we always insert exactly one instruction TLB entry. */ -#if 1 /* * Attempt to lookup PTE through virtual linear page table. * The speculative access will fail if there is no TLB entry * for the L3 page table page we're trying to access. */ - mov r31=pr // save predicates - ;; - thash r17=r16 // compute virtual address of L3 PTE + mov r16=cr.iha // get virtual address of L3 PTE ;; - ld8.s r18=[r17] // try to read L3 PTE + ld8.s r16=[r16] // try to read L3 PTE + mov r31=pr // save predicates ;; - tnat.nz p6,p0=r18 // did read succeed? + tnat.nz p6,p0=r16 // did read succeed? (p6) br.cond.spnt.many 1f ;; - itc.i r18 + itc.i r16 ;; mov pr=r31,-1 rfi -1: rsm psr.dt // use physical addressing for data -#else - mov r16=cr.ifa // get address that caused the TLB miss +1: mov r16=cr.ifa // get address that caused the TLB miss ;; rsm psr.dt // use physical addressing for data -#endif - mov r31=pr // save the predicate registers mov r19=ar.k7 // get page table base address shl r21=r16,3 // shift bit 60 into sign bit shr.u r17=r16,61 // get the region number into r17 @@ -244,33 +238,27 @@ ia64_ivt: * The DTLB basically does the same as the VHPT handler except * that we always insert exactly one data TLB entry. */ - mov r16=cr.ifa // get address that caused the TLB miss -#if 1 /* * Attempt to lookup PTE through virtual linear page table. * The speculative access will fail if there is no TLB entry * for the L3 page table page we're trying to access. */ - mov r31=pr // save predicates + mov r16=cr.iha // get virtual address of L3 PTE ;; - thash r17=r16 // compute virtual address of L3 PTE - ;; - ld8.s r18=[r17] // try to read L3 PTE + ld8.s r16=[r16] // try to read L3 PTE + mov r31=pr // save predicates ;; - tnat.nz p6,p0=r18 // did read succeed? + tnat.nz p6,p0=r16 // did read succeed? (p6) br.cond.spnt.many 1f ;; - itc.d r18 + itc.d r16 ;; mov pr=r31,-1 rfi -1: rsm psr.dt // use physical addressing for data -#else - rsm psr.dt // use physical addressing for data - mov r31=pr // save the predicate registers +1: mov r16=cr.ifa // get address that caused the TLB miss ;; -#endif + rsm psr.dt // use physical addressing for data mov r19=ar.k7 // get page table base address shl r21=r16,3 // shift bit 60 into sign bit shr.u r17=r16,61 // get the region number into r17 @@ -504,7 +492,24 @@ page_fault: mov r29=b0 // save b0 in case of nested fault) ;; 1: ld8 r18=[r17] - ;; // avoid raw on r18 +#if defined(CONFIG_IA32_SUPPORT) && \ + (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC)) + // + // Erratum 85 (Access bit fault could be reported before page not present fault) + // If the PTE is indicates the page is not present, then just turn this into a + // page fault. + // + mov r31=pr // save predicates + ;; + tbit.nz p6,p0=r18,0 // page present bit set? +(p6) br.cond.sptk 1f + ;; // avoid WAW on p6 + mov pr=r31,-1 + br.cond.sptk page_fault // page wasn't present +1: mov pr=r31,-1 +#else + ;; // avoid RAW on r18 +#endif or r18=_PAGE_A,r18 // set the accessed bit mov b0=r29 // restore b0 ;; @@ -541,14 +546,6 @@ page_fault: ;; srlz.d // ensure everyone knows psr.dt is off... cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so) -#if 1 - // Allow syscalls via the old system call number for the time being. This is - // so we can transition to the new syscall number in a relatively smooth - // fashion. - mov r17=0x80000 - ;; -(p7) cmp.eq.or.andcm p0,p7=r16,r17 // is this the old syscall number? -#endif (p7) br.cond.spnt.many non_syscall SAVE_MIN // uses r31; defines r2: diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index 153fb5684..2afb5613e 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -1,14 +1,12 @@ +#include <linux/config.h> #include <linux/kernel.h> #include <asm/page.h> #include <asm/machvec.h> -struct ia64_machine_vector ia64_mv; +#ifdef CONFIG_IA64_GENERIC -void -machvec_noop (void) -{ -} +struct ia64_machine_vector ia64_mv; /* * Most platforms use this routine for mapping page frame addresses @@ -46,3 +44,10 @@ machvec_init (const char *name) ia64_mv = *mv; printk("booting generic kernel on platform %s\n", name); } + +#endif /* CONFIG_IA64_GENERIC */ + +void +machvec_noop (void) +{ +} diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S index 147e2b8fe..0b07163dc 100644 --- a/arch/ia64/kernel/pal.S +++ b/arch/ia64/kernel/pal.S @@ -191,3 +191,57 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static) srlz.d // seralize restoration of psr.l br.ret.sptk.few b0 END(ia64_pal_call_phys_static) + +/* + * Make a PAL call using the stacked registers in physical mode. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaning PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_phys_stacked) + UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)) + alloc loc1 = ar.pfs,5,5,86,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov out0 = in0 // first argument + mov out1 = in1 // copy arg2 + mov out2 = in2 // copy arg3 + mov out3 = in3 // copy arg3 + ;; + mov loc3 = psr // save psr + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + ;; + mov ar.rsc=r0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + mov b7 = loc2 // install target to branch reg + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.few rp=ia64_switch_mode +.ret6: + br.call.sptk.many rp=b7 // now make the call +.ret7: + mov ar.rsc=r0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + br.call.sptk.few rp=ia64_switch_mode // return to virtual mode + +.ret8: mov psr.l = loc3 // restore init PSR + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // seralize restoration of psr.l + br.ret.sptk.few b0 +END(ia64_pal_call_phys_stacked) + diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index ad40e911e..ecd7b0886 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -16,24 +16,41 @@ * are empty for now. * - remove hack to avoid problem with <= 256M RAM for itr. */ +#include <linux/config.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/mm.h> +#include <linux/module.h> +#if defined(MODVERSIONS) +#include <linux/modversions.h> +#endif #include <asm/pal.h> #include <asm/sal.h> #include <asm/efi.h> #include <asm/page.h> #include <asm/processor.h> +#ifdef CONFIG_SMP +#include <linux/smp.h> +#endif + +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); +MODULE_DESCRIPTION("/proc interface to IA-64 PAL"); /* - * Hope to get rid of these in a near future + * Hope to get rid of this one in a near future */ #define IA64_PAL_VERSION_BUG 1 -#define PALINFO_VERSION "0.1" +#define PALINFO_VERSION "0.3" + +#ifdef CONFIG_SMP +#define cpu_is_online(i) (cpu_online_map & (1UL << i)) +#else +#define cpu_is_online(i) 1 +#endif typedef int (*palinfo_func_t)(char*); @@ -43,7 +60,6 @@ typedef struct { struct proc_dir_entry *entry; /* registered entry (removal) */ } palinfo_entry_t; -static struct proc_dir_entry *palinfo_dir; /* * A bunch of string array to get pretty printing @@ -95,7 +111,7 @@ static const char *rse_hints[]={ #define RSE_HINTS_COUNT (sizeof(rse_hints)/sizeof(const char *)) /* - * The current resvision of the Volume 2 of + * The current revision of the Volume 2 of * IA-64 Architecture Software Developer's Manual is wrong. * Table 4-10 has invalid information concerning the ma field: * Correct table is: @@ -121,64 +137,31 @@ static const char *mem_attrib[]={ /* * Allocate a buffer suitable for calling PAL code in Virtual mode * - * The documentation (PAL2.6) requires thius buffer to have a pinned - * translation to avoid any DTLB faults. For this reason we allocate - * a page (large enough to hold any possible reply) and use a DTC - * to hold the translation during the call. A call the free_palbuffer() - * is required to release ALL resources (page + translation). - * - * The size of the page allocated is based on the PAGE_SIZE defined - * at compile time for the kernel, i.e. >= 4Kb. + * The documentation (PAL2.6) allows DTLB misses on the buffer. So + * using the TC is enough, no need to pin the entry. * - * Return: a pointer to the newly allocated page (virtual address) + * We allocate a kernel-sized page (at least 4KB). This is enough to + * hold any possible reply. */ -static void * +static inline void * get_palcall_buffer(void) { void *tmp; tmp = (void *)__get_free_page(GFP_KERNEL); if (tmp == 0) { - printk(KERN_ERR "%s: can't get a buffer page\n", __FUNCTION__); - } else if ( ((u64)tmp - PAGE_OFFSET) > (1<<_PAGE_SIZE_256M) ) { /* XXX: temporary hack */ - unsigned long flags; - - /* PSR.ic must be zero to insert new DTR */ - ia64_clear_ic(flags); - - /* - * we only insert of DTR - * - * XXX: we need to figure out a way to "allocate" TR(s) to avoid - * conflicts. Maybe something in an include file like pgtable.h - * page.h or processor.h - * - * ITR0/DTR0: used for kernel code/data - * ITR1/DTR1: used by HP simulator - * ITR2/DTR2: used to map PAL code - */ - ia64_itr(0x2, 3, (u64)tmp, - pte_val(mk_pte_phys(__pa(tmp), __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW))), PAGE_SHIFT); - - ia64_srlz_d (); - - __restore_flags(flags); - } - + printk(KERN_ERR __FUNCTION__" : can't get a buffer page\n"); + } return tmp; } /* * Free a palcall buffer allocated with the previous call - * - * The translation is also purged. */ -static void +static inline void free_palcall_buffer(void *addr) { __free_page(addr); - ia64_ptr(0x2, (u64)addr, PAGE_SHIFT); - ia64_srlz_d (); } /* @@ -564,7 +547,6 @@ processor_info(char *page) int i; s64 ret; - /* must be in physical mode */ if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0; for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) { @@ -577,6 +559,57 @@ processor_info(char *page) return p - page; } +static const char *bus_features[]={ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL, + "Request Bus Parking", + "Bus Lock Mask", + "Enable Half Transfer", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, + "Disable Transaction Queuing", + "Disable Reponse Error Checking", + "Disable Bus Error Checking", + "Disable Bus Requester Internal Error Signalling", + "Disable Bus Requester Error Signalling", + "Disable Bus Initialization Event Checking", + "Disable Bus Initialization Event Signalling", + "Disable Bus Address Error Checking", + "Disable Bus Address Error Signalling", + "Disable Bus Data Error Checking" +}; + + +static int +bus_info(char *page) +{ + char *p = page; + const char **v = bus_features; + pal_bus_features_u_t av, st, ct; + u64 avail, status, control; + int i; + s64 ret; + + if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0; + + avail = av.pal_bus_features_val; + status = st.pal_bus_features_val; + control = ct.pal_bus_features_val; + + for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { + if ( ! *v ) continue; + p += sprintf(p, "%-48s : %s%s %s\n", *v, + avail & 0x1 ? "" : "NotImpl", + avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", + avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); + } + return p - page; +} + + /* * physical mode call for PAL_VERSION is working fine. * This function is meant to go away once PAL get fixed. @@ -613,21 +646,25 @@ version_info(char *page) #endif if (status != 0) return 0; - p += sprintf(p, "PAL_vendor : 0x%x (min=0x%x)\n" \ - "PAL_A revision : 0x%x (min=0x%x)\n" \ - "PAL_A model : 0x%x (min=0x%x)\n" \ - "PAL_B mode : 0x%x (min=0x%x)\n" \ - "PAL_B revision : 0x%x (min=0x%x)\n", + p += sprintf(p, "PAL_vendor : 0x%02x (min=0x%02x)\n" \ + "PAL_A : %x.%x.%x (min=%x.%x.%x)\n" \ + "PAL_B : %x.%x.%x (min=%x.%x.%x)\n", cur_ver.pal_version_s.pv_pal_vendor, min_ver.pal_version_s.pv_pal_vendor, + + cur_ver.pal_version_s.pv_pal_a_model>>4, + cur_ver.pal_version_s.pv_pal_a_model&0xf, cur_ver.pal_version_s.pv_pal_a_rev, - cur_ver.pal_version_s.pv_pal_a_rev, - cur_ver.pal_version_s.pv_pal_a_model, - min_ver.pal_version_s.pv_pal_a_model, + min_ver.pal_version_s.pv_pal_a_model>>4, + min_ver.pal_version_s.pv_pal_a_model&0xf, + min_ver.pal_version_s.pv_pal_a_rev, + + cur_ver.pal_version_s.pv_pal_b_model>>4, + cur_ver.pal_version_s.pv_pal_b_model&0xf, cur_ver.pal_version_s.pv_pal_b_rev, - min_ver.pal_version_s.pv_pal_b_rev, - cur_ver.pal_version_s.pv_pal_b_model, - min_ver.pal_version_s.pv_pal_b_model); + min_ver.pal_version_s.pv_pal_b_model>>4, + min_ver.pal_version_s.pv_pal_b_model&0xf, + min_ver.pal_version_s.pv_pal_b_rev); return p - page; } @@ -648,6 +685,9 @@ perfmon_info(char *page) } #ifdef IA64_PAL_PERF_MON_INFO_BUG + /* + * This bug has been fixed in PAL 2.2.9 and higher + */ pm_buffer[5]=0x3; pm_info.pal_perf_mon_info_s.cycles = 0x12; pm_info.pal_perf_mon_info_s.retired = 0x08; @@ -708,30 +748,111 @@ frequency_info(char *page) return p - page; } - -/* - * Entry point routine: all calls go trhough this function - */ static int -palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data) +tr_info(char *page) { - palinfo_func_t info = (palinfo_func_t)data; - int len = info(page); + char *p = page; + s64 status; + pal_tr_valid_u_t tr_valid; + u64 tr_buffer[4]; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + int i, j; + u64 max[3], pgm; + struct ifa_reg { + u64 valid:1; + u64 ig:11; + u64 vpn:52; + } *ifa_reg; + struct itir_reg { + u64 rv1:2; + u64 ps:6; + u64 key:24; + u64 rv2:32; + } *itir_reg; + struct gr_reg { + u64 p:1; + u64 rv1:1; + u64 ma:3; + u64 a:1; + u64 d:1; + u64 pl:2; + u64 ar:3; + u64 ppn:38; + u64 rv2:2; + u64 ed:1; + u64 ig:11; + } *gr_reg; + struct rid_reg { + u64 ig1:1; + u64 rv1:1; + u64 ig2:6; + u64 rid:24; + u64 rv2:32; + } *rid_reg; - if (len <= off+count) *eof = 1; + if ((status=ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { + printk("ia64_pal_vm_summary=%ld\n", status); + return 0; + } + max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; - *start = page + off; - len -= off; + for (i=0; i < 2; i++ ) { + for (j=0; j < max[i]; j++) { - if (len>count) len = count; - if (len<0) len = 0; + status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid); + if (status != 0) { + printk(__FUNCTION__ " pal call failed on tr[%d:%d]=%ld\n", i, j, status); + continue; + } - return len; + ifa_reg = (struct ifa_reg *)&tr_buffer[2]; + + if (ifa_reg->valid == 0) continue; + + gr_reg = (struct gr_reg *)tr_buffer; + itir_reg = (struct itir_reg *)&tr_buffer[1]; + rid_reg = (struct rid_reg *)&tr_buffer[3]; + + pgm = -1 << (itir_reg->ps - 12); + p += sprintf(p, "%cTR%d: av=%d pv=%d dv=%d mv=%d\n" \ + "\tppn : 0x%lx\n" \ + "\tvpn : 0x%lx\n" \ + "\tps : ", + + "ID"[i], + j, + tr_valid.pal_tr_valid_s.access_rights_valid, + tr_valid.pal_tr_valid_s.priv_level_valid, + tr_valid.pal_tr_valid_s.dirty_bit_valid, + tr_valid.pal_tr_valid_s.mem_attr_valid, + (gr_reg->ppn & pgm)<< 12, + (ifa_reg->vpn & pgm)<< 12); + + p = bitvector_process(p, 1<< itir_reg->ps); + + p += sprintf(p, "\n\tpl : %d\n" \ + "\tar : %d\n" \ + "\trid : %x\n" \ + "\tp : %d\n" \ + "\tma : %d\n" \ + "\td : %d\n", + gr_reg->pl, + gr_reg->ar, + rid_reg->rid, + gr_reg->p, + gr_reg->ma, + gr_reg->d); + } + } + return p - page; } + + /* - * List names,function pairs for every entry in /proc/palinfo - * Must be terminated with the NULL,NULL entry. + * List {name,function} pairs for every entry in /proc/palinfo/cpu* */ static palinfo_entry_t palinfo_entries[]={ { "version_info", version_info, }, @@ -742,38 +863,190 @@ static palinfo_entry_t palinfo_entries[]={ { "processor_info", processor_info, }, { "perfmon_info", perfmon_info, }, { "frequency_info", frequency_info, }, - { NULL, NULL,} + { "bus_info", bus_info }, + { "tr_info", tr_info, } }; +#define NR_PALINFO_ENTRIES (sizeof(palinfo_entries)/sizeof(palinfo_entry_t)) + +/* + * this array is used to keep track of the proc entries we create. This is + * required in the module mode when we need to remove all entries. The procfs code + * does not do recursion of deletion + * + * Notes: + * - first +1 accounts for the cpuN entry + * - second +1 account for toplevel palinfo + * + */ +#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)+1) + +static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; + +/* + * This data structure is used to pass which cpu,function is being requested + * It must fit in a 64bit quantity to be passed to the proc callback routine + * + * In SMP mode, when we get a request for another CPU, we must call that + * other CPU using IPI and wait for the result before returning. + */ +typedef union { + u64 value; + struct { + unsigned req_cpu: 32; /* for which CPU this info is */ + unsigned func_id: 32; /* which function is requested */ + } pal_func_cpu; +} pal_func_cpu_u_t; + +#define req_cpu pal_func_cpu.req_cpu +#define func_id pal_func_cpu.func_id + +#ifdef CONFIG_SMP + +/* + * used to hold information about final function to call + */ +typedef struct { + palinfo_func_t func; /* pointer to function to call */ + char *page; /* buffer to store results */ + int ret; /* return value from call */ +} palinfo_smp_data_t; + + +/* + * this function does the actual final call and he called + * from the smp code, i.e., this is the palinfo callback routine + */ +static void +palinfo_smp_call(void *info) +{ + palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; + /* printk(__FUNCTION__" called on CPU %d\n", smp_processor_id());*/ + if (data == NULL) { + printk(KERN_ERR __FUNCTION__" data pointer is NULL\n"); + data->ret = 0; /* no output */ + return; + } + /* does this actual call */ + data->ret = (*data->func)(data->page); +} + +/* + * function called to trigger the IPI, we need to access a remote CPU + * Return: + * 0 : error or nothing to output + * otherwise how many bytes in the "page" buffer were written + */ +static +int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) +{ + palinfo_smp_data_t ptr; + int ret; + + ptr.func = palinfo_entries[f->func_id].proc_read; + ptr.page = page; + ptr.ret = 0; /* just in case */ + + /*printk(__FUNCTION__" calling CPU %d from CPU %d for function %d\n", f->req_cpu,smp_processor_id(), f->func_id);*/ + + /* will send IPI to other CPU and wait for completion of remote call */ + if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) { + printk(__FUNCTION__" remote CPU call from %d to %d on function %d: error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret); + return 0; + } + return ptr.ret; +} +#else /* ! CONFIG_SMP */ +static +int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) +{ + printk(__FUNCTION__" should not be called with non SMP kernel\n"); + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * Entry point routine: all calls go through this function + */ +static int +palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data) +{ + int len=0; + pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data; + + MOD_INC_USE_COUNT; + /* + * in SMP mode, we may need to call another CPU to get correct + * information. PAL, by definition, is processor specific + */ + if (f->req_cpu == smp_processor_id()) + len = (*palinfo_entries[f->func_id].proc_read)(page); + else + len = palinfo_handle_smp(f, page); + + if (len <= off+count) *eof = 1; + + *start = page + off; + len -= off; + + if (len>count) len = count; + if (len<0) len = 0; + + MOD_DEC_USE_COUNT; + + return len; +} static int __init palinfo_init(void) { - palinfo_entry_t *p; +# define CPUSTR "cpu%d" + + pal_func_cpu_u_t f; + struct proc_dir_entry **pdir = palinfo_proc_entries; + struct proc_dir_entry *palinfo_dir, *cpu_dir; + int i, j; + char cpustr[sizeof(CPUSTR)]; printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); - palinfo_dir = create_proc_entry("palinfo", S_IFDIR | S_IRUGO | S_IXUGO, NULL); + palinfo_dir = proc_mkdir("pal", NULL); + + /* + * we keep track of created entries in a depth-first order for + * cleanup purposes. Each entry is stored into palinfo_proc_entries + */ + for (i=0; i < NR_CPUS; i++) { + + if (!cpu_is_online(i)) continue; + + sprintf(cpustr,CPUSTR, i); + + cpu_dir = proc_mkdir(cpustr, palinfo_dir); - for (p = palinfo_entries; p->name ; p++){ - p->entry = create_proc_read_entry (p->name, 0, palinfo_dir, - palinfo_read_entry, p->proc_read); + f.req_cpu = i; + + for (j=0; j < NR_PALINFO_ENTRIES; j++) { + f.func_id = j; + *pdir++ = create_proc_read_entry (palinfo_entries[j].name, 0, cpu_dir, + palinfo_read_entry, (void *)f.value); + } + *pdir++ = cpu_dir; } + *pdir = palinfo_dir; return 0; } -static int __exit +static void __exit palinfo_exit(void) { - palinfo_entry_t *p; + int i = 0; - for (p = palinfo_entries; p->name ; p++){ - remove_proc_entry (p->name, palinfo_dir); + /* remove all nodes: depth first pass */ + for (i=0; i< NR_PALINFO_PROC_ENTRIES ; i++) { + remove_proc_entry (palinfo_proc_entries[i]->name, NULL); } - remove_proc_entry ("palinfo", 0); - - return 0; } module_init(palinfo_init); diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index ab86e69b3..80509c6a1 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -3,34 +3,509 @@ * * This implementation is for IA-64 platforms that do not support * I/O TLBs (aka DMA address translation hardware). - * - * XXX This doesn't do the right thing yet. It appears we would have - * to add additional zones so we can implement the various address - * mask constraints that we might encounter. A zone for memory < 32 - * bits is obviously necessary... + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> */ -#include <linux/types.h> +#include <linux/config.h> + #include <linux/mm.h> -#include <linux/string.h> #include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/types.h> #include <asm/io.h> +#include <asm/pci.h> +#include <asm/dma.h> + +#ifdef CONFIG_SWIOTLB + +#include <linux/init.h> +#include <linux/bootmem.h> + +#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1))) + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + +/* + * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the + * memory was in fact allocated by this API. + */ +static char *io_tlb_start, *io_tlb_end; + +/* + * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end. + * This is command line adjustable via setup_io_tlb_npages. + */ +unsigned long io_tlb_nslabs = 1024; + +/* + * This is a free list describing the number of free entries available from each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * We need to save away the original address corresponding to a mapped entry for the sync + * operations. + */ +static unsigned char **io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED; + +static int __init +setup_io_tlb_npages (char *str) +{ + io_tlb_nslabs = simple_strtoul(str, NULL, 0) << (PAGE_SHIFT - IO_TLB_SHIFT); + return 1; +} +__setup("swiotlb=", setup_io_tlb_npages); + +/* + * Statically reserve bounce buffer space and initialize bounce buffer + * data structures for the software IO TLB used to implement the PCI DMA API + */ +void +setup_swiotlb (void) +{ + int i; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); + if (!io_tlb_start) + BUG(); + io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size 2^IO_TLB_SHIFT between + * io_tlb_start and io_tlb_end. + */ + io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = io_tlb_nslabs - i; + io_tlb_index = 0; + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); + + printk("Placing software IO TLB between 0x%p - 0x%p\n", io_tlb_start, io_tlb_end); +} + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ +static void * +__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction) +{ + unsigned long flags; + char *dma_addr; + unsigned int i, nslots, stride, index, wrap; + + /* + * For mappings greater than a page size, we limit the stride (and hence alignment) + * to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size > (1 << PAGE_SHIFT)) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = nslots; + + if (!nslots) + BUG(); + + /* + * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer + * from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + wrap = index = ALIGN(io_tlb_index, stride); + do { + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot and mark the + * entries as '0' indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + for (i = index; i < index + nslots; i++) + io_tlb_list[i] = 0; + dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in the next round. + */ + io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0; + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + + /* + * XXX What is a suitable recovery mechanism here? We cannot + * sleep because we are called from with in interrupts! + */ + panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size); +found: + } + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. This is needed + * when we sync the memory. Then we sync the buffer if needed. + */ + io_tlb_orig_addr[index] = buffer; + if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL) + memcpy(dma_addr, buffer, size); + + return dma_addr; +} + +/* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +static void +__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction) +{ + unsigned long flags; + int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + char *buffer = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ + if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL)) + /* + * bounce... copy the data back into the original buffer + * and delete the bounce buffer. + */ + memcpy(buffer, dma_addr, size); + + /* + * Return the buffer to the free list by setting the corresponding entries to indicate + * the number of contigous entries available. + * While returning the entries to the free list, we merge the entries with slots below + * and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) + io_tlb_list[i] = ++count; + /* + * Step 2: merge the returned slots with the preceeding slots, if available (non zero) + */ + for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--) + io_tlb_list[i] += io_tlb_list[index]; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} + +static void +__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction) +{ + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + char *buffer = io_tlb_orig_addr[index]; + + /* + * bounce... copy the data back into/from the original buffer + * XXX How do you handle PCI_DMA_BIDIRECTIONAL here ? + */ + if (direction == PCI_DMA_FROMDEVICE) + memcpy(buffer, dma_addr, size); + else if (direction == PCI_DMA_TODEVICE) + memcpy(dma_addr, buffer, size); + else + BUG(); +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. + * The PCI address to use is returned. + * + * Once the device is given the dma address, the device owns this memory + * until either pci_unmap_single or pci_dma_sync_single is performed. + */ +dma_addr_t +pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction) +{ + unsigned long pci_addr = virt_to_phys(ptr); + + if (direction == PCI_DMA_NONE) + BUG(); + /* + * Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if ((pci_addr & ~hwdev->dma_mask) == 0) + /* + * Device is bit capable of DMA'ing to the + * buffer... just return the PCI address of ptr + */ + return pci_addr; + + /* + * get a bounce buffer: + */ + pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction)); + + /* + * Ensure that the address returned is DMA'ble: + */ + if ((pci_addr & ~hwdev->dma_mask) != 0) + panic("__pci_map_single: bounce buffer is not DMA'ble"); + + return pci_addr; +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size + * must match what was provided for in a previous pci_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guarenteed to see + * whatever the device wrote there. + */ +void +pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction) +{ + char *dma_addr = phys_to_virt(pci_addr); + + if (direction == PCI_DMA_NONE) + BUG(); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + __pci_unmap_single(hwdev, dma_addr, size, direction); +} + +/* + * Make physical memory consistent for a single + * streaming mode DMA translation after a transfer. + * + * If you perform a pci_map_single() but wish to interrogate the + * buffer using the cpu, yet do not wish to teardown the PCI dma + * mapping, you must call this function before doing so. At the + * next point you give the PCI dma address back to the card, the + * device again owns the buffer. + */ +void +pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction) +{ + char *dma_addr = phys_to_virt(pci_addr); + + if (direction == PCI_DMA_NONE) + BUG(); + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) + __pci_sync_single(hwdev, dma_addr, size, direction); +} + +/* + * Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scather-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +int +pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) +{ + int i; + + if (direction == PCI_DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) { + sg->orig_address = sg->address; + if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) { + sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction); + } + } + return nelems; +} + +/* + * Unmap a set of streaming mode DMA translations. + * Again, cpu read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +void +pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) +{ + int i; + + if (direction == PCI_DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) + if (sg->orig_address != sg->address) { + __pci_unmap_single(hwdev, sg->address, sg->length, direction); + sg->address = sg->orig_address; + } +} + +/* + * Make physical memory consistent for a set of streaming mode DMA + * translations after a transfer. + * + * The same as pci_dma_sync_single but for a scatter-gather list, + * same rules and usage. + */ +void +pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) +{ + int i; + + if (direction == PCI_DMA_NONE) + BUG(); + + for (i = 0; i < nelems; i++, sg++) + if (sg->orig_address != sg->address) + __pci_sync_single(hwdev, sg->address, sg->length, direction); +} + +#else +/* + * Map a single buffer of the indicated size for DMA in streaming mode. + * The 32-bit bus address to use is returned. + * + * Once the device is given the dma address, the device owns this memory + * until either pci_unmap_single or pci_dma_sync_single is performed. + */ +extern inline dma_addr_t +pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + return virt_to_bus(ptr); +} + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size + * must match what was provided for in a previous pci_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guarenteed to see + * whatever the device wrote there. + */ +extern inline void +pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} +/* + * Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scather-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +extern inline int +pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + return nents; +} + +/* + * Unmap a set of streaming mode DMA translations. + * Again, cpu read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +extern inline void +pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} +/* + * Make physical memory consistent for a single + * streaming mode DMA translation after a transfer. + * + * If you perform a pci_map_single() but wish to interrogate the + * buffer using the cpu, yet do not wish to teardown the PCI dma + * mapping, you must call this function before doing so. At the + * next point you give the PCI dma address back to the card, the + * device again owns the buffer. + */ +extern inline void +pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + +/* + * Make physical memory consistent for a set of streaming mode DMA + * translations after a transfer. + * + * The same as pci_dma_sync_single but for a scatter-gather list, + * same rules and usage. + */ +extern inline void +pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + +#endif /* CONFIG_SWIOTLB */ void * pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) { - void *ret; + unsigned long pci_addr; int gfp = GFP_ATOMIC; + void *ret; - if (!hwdev || hwdev->dma_mask == 0xffffffff) - gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */ + if (!hwdev || hwdev->dma_mask <= 0xffffffff) + gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */ ret = (void *)__get_free_pages(gfp, get_order(size)); + if (!ret) + return NULL; - if (ret) { - memset(ret, 0, size); - *dma_handle = virt_to_bus(ret); - } + memset(ret, 0, size); + pci_addr = virt_to_phys(ret); + if ((pci_addr & ~hwdev->dma_mask) != 0) + panic("pci_alloc_consistent: allocated memory is out of range for PCI device"); + *dma_handle = pci_addr; return ret; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 29291e1f9..752b2a9a1 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -11,6 +11,7 @@ #include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/interrupt.h> #include <linux/smp_lock.h> #include <asm/errno.h> @@ -55,24 +56,23 @@ #define WRITE_PMCS 0xa1 #define READ_PMDS 0xa2 #define STOP_PMCS 0xa3 -#define IA64_COUNTER_MASK 0xffffffffffffff6f -#define PERF_OVFL_VAL 0xffffffff +#define IA64_COUNTER_MASK 0xffffffffffffff6fL +#define PERF_OVFL_VAL 0xffffffffL + +volatile int used_by_system; struct perfmon_counter { unsigned long data; unsigned long counter_num; }; -unsigned long pmds[MAX_PERF_COUNTER]; -struct task_struct *perf_owner=NULL; +unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER]; asmlinkage unsigned long sys_perfmonctl (int cmd1, int cmd2, void *ptr) { struct perfmon_counter tmp, *cptr = ptr; - unsigned long pmd, cnum, dcr, flags; - struct task_struct *p; - struct pt_regs *regs; + unsigned long cnum, dcr, flags; struct perf_counter; int i; @@ -80,22 +80,24 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr) case WRITE_PMCS: /* Writes to PMC's and clears PMDs */ case WRITE_PMCS_AND_START: /* Also starts counting */ - if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2)) - return -EFAULT; + if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system) + return -EINVAL; - if (cmd2 > MAX_PERF_COUNTER) + if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2)) return -EFAULT; - if (perf_owner && perf_owner != current) - return -EBUSY; - perf_owner = current; + current->thread.flags |= IA64_THREAD_PM_VALID; for (i = 0; i < cmd2; i++, cptr++) { copy_from_user(&tmp, cptr, sizeof(tmp)); /* XXX need to check validity of counter_num and perhaps data!! */ + if (tmp.counter_num < 4 + || tmp.counter_num >= 4 + MAX_PERF_COUNTER - used_by_system) + return -EFAULT; + ia64_set_pmc(tmp.counter_num, tmp.data); ia64_set_pmd(tmp.counter_num, 0); - pmds[tmp.counter_num - 4] = 0; + pmds[smp_processor_id()][tmp.counter_num - 4] = 0; } if (cmd1 == WRITE_PMCS_AND_START) { @@ -104,26 +106,13 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr) dcr |= IA64_DCR_PP; ia64_set_dcr(dcr); local_irq_restore(flags); - - /* - * This is a no can do. It obviously wouldn't - * work on SMP where another process may not - * be blocked at all. We need to put in a perfmon - * IPI to take care of MP systems. See blurb above. - */ - lock_kernel(); - for_each_task(p) { - regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) -1 ; - ia64_psr(regs)->pp = 1; - } - unlock_kernel(); ia64_set_pmc(0, 0); } break; case READ_PMDS: - if (cmd2 > MAX_PERF_COUNTER) - return -EFAULT; + if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system) + return -EINVAL; if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2)) return -EFAULT; @@ -153,9 +142,13 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr) * when we re-enabled interrupts. When I muck with dcr, * is the irq_save/restore needed? */ - for (i = 0, cnum = 4;i < MAX_PERF_COUNTER; i++, cnum++, cptr++){ - pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL); - put_user(pmd, &cptr->data); + for (i = 0, cnum = 4;i < cmd2; i++, cnum++, cptr++) { + tmp.data = (pmds[smp_processor_id()][i] + + (ia64_get_pmd(cnum) & PERF_OVFL_VAL)); + tmp.counter_num = cnum; + if (copy_to_user(cptr, &tmp, sizeof(tmp))) + return -EFAULT; + //put_user(pmd, &cptr->data); } local_irq_save(flags); __asm__ __volatile__("ssm psr.pp"); @@ -167,30 +160,22 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr) case STOP_PMCS: ia64_set_pmc(0, 1); - for (i = 0; i < MAX_PERF_COUNTER; ++i) - ia64_set_pmc(i, 0); + ia64_srlz_d(); + for (i = 0; i < MAX_PERF_COUNTER - used_by_system; ++i) + ia64_set_pmc(4+i, 0); - local_irq_save(flags); - dcr = ia64_get_dcr(); - dcr &= ~IA64_DCR_PP; - ia64_set_dcr(dcr); - local_irq_restore(flags); - /* - * This is a no can do. It obviously wouldn't - * work on SMP where another process may not - * be blocked at all. We need to put in a perfmon - * IPI to take care of MP systems. See blurb above. - */ - lock_kernel(); - for_each_task(p) { - regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1; - ia64_psr(regs)->pp = 0; + if (!used_by_system) { + local_irq_save(flags); + dcr = ia64_get_dcr(); + dcr &= ~IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); } - unlock_kernel(); - perf_owner = NULL; + current->thread.flags &= ~(IA64_THREAD_PM_VALID); break; default: + return -EINVAL; break; } return 0; @@ -202,13 +187,13 @@ update_counters (void) unsigned long mask, i, cnum, val; mask = ia64_get_pmc(0) >> 4; - for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) { + for (i = 0, cnum = 4; i < MAX_PERF_COUNTER - used_by_system; cnum++, i++, mask >>= 1) { + val = 0; if (mask & 0x1) - val = PERF_OVFL_VAL; - else + val += PERF_OVFL_VAL + 1; /* since we got an interrupt, might as well clear every pmd. */ - val = ia64_get_pmd(cnum) & PERF_OVFL_VAL; - pmds[i] += val; + val += ia64_get_pmd(cnum) & PERF_OVFL_VAL; + pmds[smp_processor_id()][i] += val; ia64_set_pmd(cnum, 0); } } @@ -221,20 +206,61 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs) ia64_srlz_d(); } +static struct irqaction perfmon_irqaction = { + handler: perfmon_interrupt, + flags: SA_INTERRUPT, + name: "perfmon" +}; + void perfmon_init (void) { - if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) { - printk("perfmon_init: could not allocate performance monitor vector %u\n", - PERFMON_IRQ); - return; - } + irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU; + irq_desc[PERFMON_IRQ].handler = &irq_type_ia64_sapic; + setup_irq(PERFMON_IRQ, &perfmon_irqaction); + ia64_set_pmv(PERFMON_IRQ); ia64_srlz_d(); printk("Initialized perfmon vector to %u\n",PERFMON_IRQ); } +void +perfmon_init_percpu (void) +{ + ia64_set_pmv(PERFMON_IRQ); + ia64_srlz_d(); +} + +void +ia64_save_pm_regs (struct thread_struct *t) +{ + int i; + + ia64_set_pmc(0, 1); + ia64_srlz_d(); + for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) { + t->pmd[i] = ia64_get_pmd(4+i); + t->pmod[i] = pmds[smp_processor_id()][i]; + t->pmc[i] = ia64_get_pmc(4+i); + } +} + +void +ia64_load_pm_regs (struct thread_struct *t) +{ + int i; + + for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) { + ia64_set_pmd(4+i, t->pmd[i]); + pmds[smp_processor_id()][i] = t->pmod[i]; + ia64_set_pmc(4+i, t->pmc[i]); + } + ia64_set_pmc(0, 0); + ia64_srlz_d(); +} + #else /* !CONFIG_PERFMON */ + asmlinkage unsigned long sys_perfmonctl (int cmd1, int cmd2, void *ptr) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 3759e52f8..e586a4074 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -27,6 +27,8 @@ #include <asm/unwind.h> #include <asm/user.h> +#ifdef CONFIG_IA64_NEW_UNWIND + static void do_show_stack (struct unw_frame_info *info, void *arg) { @@ -44,6 +46,8 @@ do_show_stack (struct unw_frame_info *info, void *arg) } while (unw_unwind(info) >= 0); } +#endif + void show_stack (struct task_struct *task) { @@ -118,15 +122,14 @@ cpu_idle (void *unused) current->nice = 20; current->counter = -100; -#ifdef CONFIG_SMP - if (!current->need_resched) - min_xtp(); -#endif while (1) { - while (!current->need_resched) { +#ifdef CONFIG_SMP + if (!current->need_resched) + min_xtp(); +#endif + while (!current->need_resched) continue; - } #ifdef CONFIG_SMP normal_xtp(); #endif @@ -157,11 +160,12 @@ cpu_idle (void *unused) void ia64_save_extra (struct task_struct *task) { - extern void ia64_save_debug_regs (unsigned long *save_area); - extern void ia32_save_state (struct thread_struct *thread); - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) ia64_save_debug_regs(&task->thread.dbr[0]); +#ifdef CONFIG_PERFMON + if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) + ia64_save_pm_regs(&task->thread); +#endif if (IS_IA32_PROCESS(ia64_task_regs(task))) ia32_save_state(&task->thread); } @@ -169,11 +173,12 @@ ia64_save_extra (struct task_struct *task) void ia64_load_extra (struct task_struct *task) { - extern void ia64_load_debug_regs (unsigned long *save_area); - extern void ia32_load_state (struct thread_struct *thread); - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) ia64_load_debug_regs(&task->thread.dbr[0]); +#ifdef CONFIG_PERFMON + if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) + ia64_load_pm_regs(&task->thread); +#endif if (IS_IA32_PROCESS(ia64_task_regs(task))) ia32_load_state(&task->thread); } @@ -532,17 +537,6 @@ exit_thread (void) } } -/* - * Free remaining state associated with DEAD_TASK. This is called - * after the parent of DEAD_TASK has collected the exist status of the - * task via wait(). - */ -void -release_thread (struct task_struct *dead_task) -{ - /* nothing to do */ -} - unsigned long get_wchan (struct task_struct *p) { diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index ae8991c51..10868ce41 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -549,6 +549,7 @@ void ia64_sync_fph (struct task_struct *child) { if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) { + ia64_psr(ia64_task_regs(child))->mfh = 0; ia64_set_fpu_owner(0); ia64_save_fpu(&child->thread.fph[0]); child->thread.flags |= IA64_THREAD_FPH_VALID; diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index f4b8ce9dd..f73cd8968 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -156,6 +156,14 @@ ia64_sal_init (struct ia64_sal_systab *systab) struct ia64_sal_desc_platform_feature *pf = (void *) p; printk("SAL: Platform features "); +#ifdef CONFIG_IA64_HAVE_IRQREDIR + /* + * Early versions of SAL say we don't have + * IRQ redirection, even though we do... + */ + pf->feature_mask |= (1 << 1); +#endif + if (pf->feature_mask & (1 << 0)) printk("BusLock "); diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c index bc55670bf..1bbe4feab 100644 --- a/arch/ia64/kernel/semaphore.c +++ b/arch/ia64/kernel/semaphore.c @@ -222,9 +222,6 @@ down_read_failed (struct rw_semaphore *sem) void __down_read_failed (struct rw_semaphore *sem, long count) { - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - while (1) { if (count == -1) { down_read_failed_biased(sem); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index fcb4e6190..62e3e19ea 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -122,6 +122,10 @@ setup_arch (char **cmdline_p) */ memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param)); + *cmdline_p = __va(ia64_boot_param.command_line); + strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */ + efi_init(); max_pfn = 0; @@ -133,19 +137,65 @@ setup_arch (char **cmdline_p) */ bootmap_start = PAGE_ALIGN(__pa(&_end)); if (ia64_boot_param.initrd_size) - bootmap_start = PAGE_ALIGN(bootmap_start + ia64_boot_param.initrd_size); + bootmap_start = PAGE_ALIGN(bootmap_start + + ia64_boot_param.initrd_size); bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); efi_memmap_walk(free_available_memory, 0); reserve_bootmem(bootmap_start, bootmap_size); + #ifdef CONFIG_BLK_DEV_INITRD initrd_start = ia64_boot_param.initrd_start; + if (initrd_start) { + u64 start, size; +# define is_same_page(a,b) (((a)&PAGE_MASK) == ((b)&PAGE_MASK)) + +#if 1 + /* XXX for now some backwards compatibility... */ + if (initrd_start >= PAGE_OFFSET) + printk("Warning: boot loader passed virtual address " + "for initrd, please upgrade the loader\n"); + } else +#endif + /* + * The loader ONLY passes physical addresses + */ + initrd_start = (unsigned long)__va(initrd_start); initrd_end = initrd_start+ia64_boot_param.initrd_size; + start = initrd_start; + size = ia64_boot_param.initrd_size; + printk("Initial ramdisk at: 0x%p (%lu bytes)\n", (void *) initrd_start, ia64_boot_param.initrd_size); - reserve_bootmem(virt_to_phys(initrd_start), ia64_boot_param.initrd_size); + + /* + * The kernel end and the beginning of initrd can be + * on the same page. This would cause the page to be + * reserved twice. While not harmful, it does lead to + * a warning message which can cause confusion. Thus, + * we make sure that in this case we only reserve new + * pages, i.e., initrd only pages. We need to: + * + * - align up start + * - adjust size of reserved section accordingly + * + * It should be noted that this operation is only + * valid for the reserve_bootmem() call and does not + * affect the integrety of the initrd itself. + * + * reserve_bootmem() considers partial pages as reserved. + */ + if (is_same_page(initrd_start, (unsigned long)&_end)) { + start = PAGE_ALIGN(start); + size -= start-initrd_start; + + printk("Initial ramdisk & kernel on the same page: " + "reserving start=%lx size=%ld bytes\n", + start, size); + } + reserve_bootmem(__pa(start), size); } #endif #if 0 @@ -164,27 +214,21 @@ setup_arch (char **cmdline_p) /* process SAL system table: */ ia64_sal_init(efi.sal_systab); - *cmdline_p = __va(ia64_boot_param.command_line); - strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */ - - printk("args to kernel: %s\n", *cmdline_p); - #ifdef CONFIG_SMP bootstrap_processor = hard_smp_processor_id(); current->processor = bootstrap_processor; #endif cpu_init(); /* initialize the bootstrap CPU */ +#ifdef CONFIG_IA64_GENERIC + machvec_init(acpi_get_sysname()); +#endif + if (efi.acpi) { /* Parse the ACPI tables */ acpi_parse(efi.acpi); } -#ifdef CONFIG_IA64_GENERIC - machvec_init(acpi_get_sysname()); -#endif - #ifdef CONFIG_VT # if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; @@ -197,8 +241,16 @@ setup_arch (char **cmdline_p) /* enable IA-64 Machine Check Abort Handling */ ia64_mca_init(); #endif + paging_init(); platform_setup(cmdline_p); + +#ifdef CONFIG_SWIOTLB + { + extern void setup_swiotlb (void); + setup_swiotlb(); + } +#endif } /* diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index b5153433f..d64305cf3 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -320,6 +320,58 @@ smp_send_flush_tlb(void) #endif /* !CONFIG_ITANIUM_PTCG */ /* + * Run a function on another CPU + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <retry> If true, keep retrying until ready. + * <wait> If true, wait until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. + * + * Does not return until the remote CPU is nearly ready to execute <func> + * or is or has executed. + */ + +int +smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int retry, int wait) +{ + struct smp_call_struct data; + long timeout; + int cpus = 1; + + if (cpuid == smp_processor_id()) { + printk(__FUNCTION__" trying to call self\n"); + return -EBUSY; + } + + data.func = func; + data.info = info; + data.wait = wait; + atomic_set(&data.unstarted_count, cpus); + atomic_set(&data.unfinished_count, cpus); + + if (pointer_lock(&smp_call_function_data, &data, retry)) + return -EBUSY; + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_single(cpuid, IPI_CALL_FUNC); + + /* Wait for response */ + timeout = jiffies + HZ; + while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout)) + barrier(); + if (atomic_read(&data.unstarted_count) > 0) { + smp_call_function_data = NULL; + return -ETIMEDOUT; + } + if (wait) + while (atomic_read(&data.unfinished_count) > 0) + barrier(); + /* unlock pointer */ + smp_call_function_data = NULL; + return 0; +} + +/* * Run a function on all other CPUs. * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. @@ -396,13 +448,19 @@ void smp_do_timer(struct pt_regs *regs) { int cpu = smp_processor_id(); + int user = user_mode(regs); struct cpuinfo_ia64 *data = &cpu_data[cpu]; - if (!--data->prof_counter) { - irq_enter(cpu, TIMER_IRQ); - update_process_times(user_mode(regs)); + if (--data->prof_counter <= 0) { data->prof_counter = data->prof_multiplier; - irq_exit(cpu, TIMER_IRQ); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + irq_enter(cpu, 0); + update_process_times(user); + irq_exit(cpu, 0); } } @@ -473,6 +531,11 @@ smp_callin(void) extern void ia64_rid_init(void); extern void ia64_init_itm(void); extern void ia64_cpu_local_tick(void); +#ifdef CONFIG_PERFMON + extern void perfmon_init_percpu(void); +#endif + + efi_map_pal_code(); cpu_init(); @@ -481,6 +544,10 @@ smp_callin(void) /* setup the CPU local timer tick */ ia64_init_itm(); +#ifdef CONFIG_PERFMON + perfmon_init_percpu(); +#endif + /* Disable all local interrupts */ ia64_set_lrr0(0, 1); ia64_set_lrr1(0, 1); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 95b2b3fc3..96ff76c01 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -150,11 +150,13 @@ do_gettimeofday (struct timeval *tv) static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - static unsigned long last_time; - static unsigned char count; int cpu = smp_processor_id(); unsigned long new_itm; +#if 0 + static unsigned long last_time; + static unsigned char count; int printed = 0; +#endif /* * Here we are in the timer irq handler. We have irqs locally @@ -192,7 +194,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) if (time_after(new_itm, ia64_get_itc())) break; -#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP)) +#if 0 /* * SoftSDV in SMP mode is _slow_, so we do "lose" ticks, * but it's really OK... diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 4003b20f1..bf1abd839 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -204,11 +204,13 @@ disabled_fph_fault (struct pt_regs *regs) { struct task_struct *fpu_owner = ia64_get_fpu_owner(); + /* first, clear psr.dfh and psr.mfh: */ regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH); if (fpu_owner != current) { ia64_set_fpu_owner(current); if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) { + ia64_psr(ia64_task_regs(fpu_owner))->mfh = 0; fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID; __ia64_save_fpu(fpu_owner->thread.fph); } @@ -216,6 +218,11 @@ disabled_fph_fault (struct pt_regs *regs) __ia64_load_fpu(current->thread.fph); } else { __ia64_init_fpu(); + /* + * Set mfh because the state in thread.fph does not match + * the state in the fph partition. + */ + ia64_psr(regs)->mfh = 1; } } } diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 739007a96..5d0049f32 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -62,7 +62,7 @@ #define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1) #define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE) -#define UNW_DEBUG 1 +#define UNW_DEBUG 0 #define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */ #if UNW_DEBUG diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S index 3b16916d0..350e66256 100644 --- a/arch/ia64/lib/memcpy.S +++ b/arch/ia64/lib/memcpy.S @@ -1,3 +1,20 @@ +/* + * + * Optimized version of the standard memcpy() function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * no return value + * + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> + #include <asm/asmmacro.h> GLOBAL_ENTRY(bcopy) @@ -10,77 +27,254 @@ END(bcopy) // FALL THROUGH GLOBAL_ENTRY(memcpy) -# define MEM_LAT 4 - -# define N MEM_LAT-1 -# define Nrot ((MEM_LAT + 7) & ~7) +# define MEM_LAT 2 /* latency to L1 cache */ # define dst r2 # define src r3 -# define len r9 -# define saved_pfs r10 -# define saved_lc r11 -# define saved_pr r16 -# define t0 r17 -# define cnt r18 +# define retval r8 +# define saved_pfs r9 +# define saved_lc r10 +# define saved_pr r11 +# define cnt r16 +# define src2 r17 +# define t0 r18 +# define t1 r19 +# define t2 r20 +# define t3 r21 +# define t4 r22 +# define src_end r23 +# define N (MEM_LAT + 4) +# define Nrot ((N + 7) & ~7) + + /* + * First, check if everything (src, dst, len) is a multiple of eight. If + * so, we handle everything with no taken branches (other than the loop + * itself) and a small icache footprint. Otherwise, we jump off to + * the more general copy routine handling arbitrary + * sizes/alignment etc. + */ UNW(.prologue) UNW(.save ar.pfs, saved_pfs) alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot +#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) lfetch [in1] +#else + nop.m 0 +#endif + or t0=in0,in1 + ;; - .rotr val[MEM_LAT] - .rotp p[MEM_LAT] - + or t0=t0,in2 UNW(.save ar.lc, saved_lc) mov saved_lc=ar.lc - - or t0=in0,in1 UNW(.save pr, saved_pr) mov saved_pr=pr - UNW(.body) - - mov ar.ec=MEM_LAT + cmp.eq p6,p0=in2,r0 // zero length? + mov retval=in0 // return dst +(p6) br.ret.spnt.many rp // zero length, return immediately + ;; - mov r8=in0 // return dst - shr cnt=in2,3 // number of 8-byte words to copy + mov dst=in0 // copy because of rotation + shr.u cnt=in2,3 // number of 8-byte words to copy mov pr.rot=1<<16 ;; - cmp.eq p6,p0=in2,r0 // zero length? - or t0=t0,in2 -(p6) br.ret.spnt.many rp // yes, return immediately - mov dst=in0 // copy because of rotation - mov src=in1 // copy because of rotation adds cnt=-1,cnt // br.ctop is repeat/until + cmp.gtu p7,p0=16,in2 // copying less than 16 bytes? + UNW(.body) + mov ar.ec=N ;; + and t0=0x7,t0 mov ar.lc=cnt ;; cmp.ne p6,p0=t0,r0 -(p6) br.cond.spnt.few slow_memcpy + mov src=in1 // copy because of rotation +(p7) br.cond.spnt.few memcpy_short +(p6) br.cond.spnt.few memcpy_long + ;; + .rotr val[N] + .rotp p[N] 1: (p[0]) ld8 val[0]=[src],8 -(p[N]) st8 [dst]=val[N],8 - br.ctop.sptk.few 1b +(p[N-1])st8 [dst]=val[N-1],8 + br.ctop.dptk.few 1b ;; -.exit: mov ar.lc=saved_lc - mov pr=saved_pr,0xffffffffffff0000 + mov pr=saved_pr,-1 mov ar.pfs=saved_pfs br.ret.sptk.many rp -slow_memcpy: - adds cnt=-1,in2 + /* + * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time + * copy loop. This performs relatively poorly on Itanium, but it doesn't + * get used very often (gcc inlines small copies) and due to atomicity + * issues, we want to avoid read-modify-write of entire words. + */ + .align 32 +memcpy_short: + adds cnt=-1,in2 // br.ctop is repeat/until + mov ar.ec=MEM_LAT ;; mov ar.lc=cnt ;; + /* + * It is faster to put a stop bit in the loop here because it makes + * the pipeline shorter (and latency is what matters on short copies). + */ 1: (p[0]) ld1 val[0]=[src],1 -(p[N]) st1 [dst]=val[N],1 - br.ctop.sptk.few 1b - br.sptk.few .exit + ;; +(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1 + br.ctop.dptk.few 1b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't + * an overriding concern here, but throughput is. We first do + * sub-word copying until the destination is aligned, then we check + * if the source is also aligned. If so, we do a simple load/store-loop + * until there are less than 8 bytes left over and then we do the tail, + * by storing the last few bytes using sub-word copying. If the source + * is not aligned, we branch off to the non-congruent loop. + * + * stage: op: + * 0 ld + * : + * MEM_LAT+3 shrp + * MEM_LAT+4 st + * + * On Itanium, the pipeline itself runs without stalls. However, br.ctop + * seems to introduce an unavoidable bubble in the pipeline so the overall + * latency is 2 cycles/iteration. This gives us a _copy_ throughput + * of 4 byte/cycle. Still not bad. + */ +# undef N +# undef Nrot +# define N (MEM_LAT + 5) /* number of stages */ +# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */ + +#define LOG_LOOP_SIZE 6 + +memcpy_long: + alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame + and t0=-8,src // t0 = src & ~7 + and t2=7,src // t2 = src & 7 + ;; + ld8 t0=[t0] // t0 = 1st source word + adds src2=7,src // src2 = (src + 7) + sub t4=r0,dst // t4 = -dst + ;; + and src2=-8,src2 // src2 = (src + 7) & ~7 + shl t2=t2,3 // t2 = 8*(src & 7) + shl t4=t4,3 // t4 = 8*(dst & 7) + ;; + ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise + sub t3=64,t2 // t3 = 64-8*(src & 7) + shr.u t0=t0,t2 + ;; + add src_end=src,in2 + shl t1=t1,t3 + mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7) + ;; + or t0=t0,t1 + mov cnt=r0 + adds src_end=-1,src_end + ;; +(p3) st1 [dst]=t0,1 +(p3) shr.u t0=t0,8 +(p3) adds cnt=1,cnt + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 +(p4) adds cnt=2,cnt + ;; +(p5) st4 [dst]=t0,4 +(p5) adds cnt=4,cnt + and src_end=-8,src_end // src_end = last word of source buffer + ;; + + // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy: + +1:{ add src=cnt,src // make src point to remainder of source buffer + sub cnt=in2,cnt // cnt = number of bytes left to copy + mov t4=ip + } ;; + and src2=-8,src // align source pointer + adds t4=memcpy_loops-1b,t4 + mov ar.ec=N + + and t0=7,src // t0 = src & 7 + shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy + shl cnt=cnt,3 // move bits 0-2 to 3-5 + ;; + + .rotr val[N+1], w[2] + .rotp p[N] + + cmp.ne p6,p0=t0,r0 // is src aligned, too? + shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7) + adds t2=-1,t2 // br.ctop is repeat/until + ;; + add t4=t0,t4 + mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy + mov ar.lc=t2 + ;; +(p6) ld8 val[1]=[src2],8 // prime the pump... + mov b6=t4 + br.sptk.few b6 + ;; + +memcpy_tail: + // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is + // less than 8) and t0 contains the last few bytes of the src buffer: +(p5) st4 [dst]=t0,4 +(p5) shr.u t0=t0,32 + mov ar.lc=saved_lc + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 + mov ar.pfs=saved_pfs + ;; +(p3) st1 [dst]=t0 + mov pr=saved_pr,-1 + br.ret.sptk.many rp + +/////////////////////////////////////////////////////// + .align 64 + +#define COPY(shift,index) \ + 1: \ + { .mfi \ + (p[0]) ld8 val[0]=[src2],8; \ + nop.f 0; \ + (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \ + }; \ + { .mbb \ + (p[MEM_LAT+4]) st8 [dst]=w[1],8; \ + nop.b 0; \ + br.ctop.dptk.few 1b; \ + }; \ + ;; \ + ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \ + ;; \ + shrp t0=val[N-1],val[N-index],shift; \ + br memcpy_tail +memcpy_loops: + COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */ + COPY(8, 0) + COPY(16, 0) + COPY(24, 0) + COPY(32, 0) + COPY(40, 0) + COPY(48, 0) + COPY(56, 0) END(memcpy) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 8ddda7e11..3652cfc80 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -185,8 +185,42 @@ free_initmem (void) void free_initrd_mem(unsigned long start, unsigned long end) { + /* + * EFI uses 4KB pages while the kernel can use 4KB or bigger. + * Thus EFI and the kernel may have different page sizes. It is + * therefore possible to have the initrd share the same page as + * the end of the kernel (given current setup). + * + * To avoid freeing/using the wrong page (kernel sized) we: + * - align up the beginning of initrd + * - keep the end untouched + * + * | | + * |=============| a000 + * | | + * | | + * | | 9000 + * |/////////////| + * |/////////////| + * |=============| 8000 + * |///INITRD////| + * |/////////////| + * |/////////////| 7000 + * | | + * |KKKKKKKKKKKKK| + * |=============| 6000 + * |KKKKKKKKKKKKK| + * |KKKKKKKKKKKKK| + * K=kernel using 8KB pages + * + * In this example, we must free page 8000 ONLY. So we must align up + * initrd_start and keep initrd_end as is. + */ + start = PAGE_ALIGN(start); + if (start < end) printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { clear_bit(PG_reserved, &virt_to_page(start)->flags); set_page_count(virt_to_page(start), 1); @@ -423,5 +457,4 @@ mem_init (void) #ifdef CONFIG_IA32_SUPPORT ia32_gdt_init(); #endif - return; } diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 026f88998..875ce446c 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -1,8 +1,11 @@ /* * TLB support routines. * - * Copyright (C) 1998, 1999 Hewlett-Packard Co - * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 08/02/00 A. Mallick <asit.k.mallick@intel.com> + * Modified RID allocation for SMP */ #include <linux/config.h> #include <linux/init.h> @@ -27,9 +30,11 @@ 1 << _PAGE_SIZE_8K | \ 1 << _PAGE_SIZE_4K ) -static void wrap_context (struct mm_struct *mm); - -unsigned long ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1; +struct ia64_ctx ia64_ctx = { + lock: SPIN_LOCK_UNLOCKED, + next: 1, + limit: (1UL << IA64_HW_CONTEXT_BITS) +}; /* * Put everything in a struct so we avoid the global offset table whenever @@ -106,49 +111,43 @@ flush_tlb_no_ptcg (unsigned long start, unsigned long end, unsigned long nbits) #endif /* CONFIG_SMP && !CONFIG_ITANIUM_PTCG */ -void -get_new_mmu_context (struct mm_struct *mm) -{ - if ((ia64_next_context & IA64_HW_CONTEXT_MASK) == 0) { - wrap_context(mm); - } - mm->context = ia64_next_context++; -} - /* - * This is where we handle the case where (ia64_next_context & - * IA64_HW_CONTEXT_MASK) == 0. Whenever this happens, we need to - * flush the entire TLB and skip over region id number 0, which is - * used by the kernel. + * Acquire the ia64_ctx.lock before calling this function! */ -static void -wrap_context (struct mm_struct *mm) +void +wrap_mmu_context (struct mm_struct *mm) { - struct task_struct *task; + struct task_struct *tsk; + unsigned long tsk_context; + + if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) + ia64_ctx.next = 300; /* skip daemons */ + ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS); /* - * We wrapped back to the first region id so we nuke the TLB - * so we can switch to the next generation of region ids. + * Scan all the task's mm->context and set proper safe range */ - __flush_tlb_all(); - if (ia64_next_context++ == 0) { - /* - * Oops, we've used up all 64 bits of the context - * space---walk through task table to ensure we don't - * get tricked into using an old context. If this - * happens, the machine has been running for a long, - * long time! - */ - ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1; - read_lock(&tasklist_lock); - for_each_task (task) { - if (task->mm == mm) - continue; - flush_tlb_mm(mm); + read_lock(&tasklist_lock); + repeat: + for_each_task(tsk) { + if (!tsk->mm) + continue; + tsk_context = tsk->mm->context; + if (tsk_context == ia64_ctx.next) { + if (++ia64_ctx.next >= ia64_ctx.limit) { + /* empty range: reset the range limit and start over */ + if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) + ia64_ctx.next = 300; + ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS); + goto repeat; + } } - read_unlock(&tasklist_lock); + if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) + ia64_ctx.limit = tsk_context; } + read_unlock(&tasklist_lock); + flush_tlb_all(); } void diff --git a/arch/ia64/sn/sn1/irq.c b/arch/ia64/sn/sn1/irq.c index df8e56943..a8270fd2a 100644 --- a/arch/ia64/sn/sn1/irq.c +++ b/arch/ia64/sn/sn1/irq.c @@ -1,9 +1,10 @@ #include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/irq.h> -#include <asm/irq.h> #include <asm/ptrace.h> -static int +static unsigned int sn1_startup_irq(unsigned int irq) { return(0); @@ -24,23 +25,16 @@ sn1_enable_irq(unsigned int irq) { } -static int -sn1_handle_irq(unsigned int irq, struct pt_regs *regs) -{ - return(0); -} - struct hw_interrupt_type irq_type_sn1 = { "sn1_irq", sn1_startup_irq, sn1_shutdown_irq, - sn1_handle_irq, sn1_enable_irq, sn1_disable_irq }; void -sn1_irq_init (struct irq_desc desc[NR_IRQS]) +sn1_irq_init (void) { int i; diff --git a/arch/ia64/sn/sn1/machvec.c b/arch/ia64/sn/sn1/machvec.c index 2e36b2e08..409d9a2ea 100644 --- a/arch/ia64/sn/sn1/machvec.c +++ b/arch/ia64/sn/sn1/machvec.c @@ -1,4 +1,2 @@ +#define MACHVEC_PLATFORM_NAME sn1 #include <asm/machvec_init.h> -#include <asm/machvec_sn1.h> - -MACHVEC_DEFINE(sn1) diff --git a/arch/ia64/sn/sn1/setup.c b/arch/ia64/sn/sn1/setup.c index 45242fc26..7b397bb6b 100644 --- a/arch/ia64/sn/sn1/setup.c +++ b/arch/ia64/sn/sn1/setup.c @@ -13,6 +13,7 @@ #include <linux/console.h> #include <linux/timex.h> #include <linux/sched.h> +#include <linux/ioport.h> #include <asm/io.h> #include <asm/machvec.h> diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S index 5c199bc1a..c8c13363c 100644 --- a/arch/ia64/vmlinux.lds.S +++ b/arch/ia64/vmlinux.lds.S @@ -46,6 +46,15 @@ SECTIONS { *(__ex_table) } __stop___ex_table = .; +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + machvec_start = .; + .machvec : AT(ADDR(.machvec) - PAGE_OFFSET) + { *(.machvec) } + machvec_end = .; +#endif + __start___ksymtab = .; /* Kernel symbol table */ __ksymtab : AT(ADDR(__ksymtab) - PAGE_OFFSET) { *(__ksymtab) } |