summaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-08-28 22:00:09 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-08-28 22:00:09 +0000
commit1a1d77dd589de5a567fa95e36aa6999c704ceca4 (patch)
tree141e31f89f18b9fe0831f31852e0435ceaccafc5 /arch/ia64
parentfb9c690a18b3d66925a65b17441c37fa14d4370b (diff)
Merge with 2.4.0-test7.
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/config.in30
-rw-r--r--arch/ia64/dig/iosapic.c57
-rw-r--r--arch/ia64/dig/machvec.c4
-rw-r--r--arch/ia64/dig/setup.c4
-rw-r--r--arch/ia64/hp/hpsim_machvec.c4
-rw-r--r--arch/ia64/ia32/ia32_entry.S58
-rw-r--r--arch/ia64/ia32/sys_ia32.c424
-rw-r--r--arch/ia64/kernel/Makefile4
-rw-r--r--arch/ia64/kernel/acpi.c46
-rw-r--r--arch/ia64/kernel/efi.c47
-rw-r--r--arch/ia64/kernel/entry.S18
-rw-r--r--arch/ia64/kernel/head.S73
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c2
-rw-r--r--arch/ia64/kernel/irq_ia64.c7
-rw-r--r--arch/ia64/kernel/ivt.S65
-rw-r--r--arch/ia64/kernel/machvec.c15
-rw-r--r--arch/ia64/kernel/pal.S54
-rw-r--r--arch/ia64/kernel/palinfo.c443
-rw-r--r--arch/ia64/kernel/pci-dma.c503
-rw-r--r--arch/ia64/kernel/perfmon.c148
-rw-r--r--arch/ia64/kernel/process.c40
-rw-r--r--arch/ia64/kernel/ptrace.c1
-rw-r--r--arch/ia64/kernel/sal.c8
-rw-r--r--arch/ia64/kernel/semaphore.c3
-rw-r--r--arch/ia64/kernel/setup.c76
-rw-r--r--arch/ia64/kernel/smp.c75
-rw-r--r--arch/ia64/kernel/time.c8
-rw-r--r--arch/ia64/kernel/traps.c7
-rw-r--r--arch/ia64/kernel/unwind.c2
-rw-r--r--arch/ia64/lib/memcpy.S264
-rw-r--r--arch/ia64/mm/init.c35
-rw-r--r--arch/ia64/mm/tlb.c77
-rw-r--r--arch/ia64/sn/sn1/irq.c14
-rw-r--r--arch/ia64/sn/sn1/machvec.c4
-rw-r--r--arch/ia64/sn/sn1/setup.c1
-rw-r--r--arch/ia64/vmlinux.lds.S9
36 files changed, 1932 insertions, 698 deletions
diff --git a/arch/ia64/config.in b/arch/ia64/config.in
index 8b8dd761c..0fdf86c19 100644
--- a/arch/ia64/config.in
+++ b/arch/ia64/config.in
@@ -18,15 +18,16 @@ mainmenu_option next_comment
comment 'General setup'
define_bool CONFIG_IA64 y
+define_bool CONFIG_SWIOTLB y # for now...
define_bool CONFIG_ISA n
define_bool CONFIG_SBUS n
choice 'IA-64 system type' \
- "Generic CONFIG_IA64_GENERIC \
+ "generic CONFIG_IA64_GENERIC \
+ DIG-compliant CONFIG_IA64_DIG \
HP-simulator CONFIG_IA64_HP_SIM \
- SN1-simulator CONFIG_IA64_SGI_SN1_SIM \
- DIG-compliant CONFIG_IA64_DIG" Generic
+ SN1-simulator CONFIG_IA64_SGI_SN1_SIM" generic
choice 'Kernel page size' \
"4KB CONFIG_IA64_PAGE_SIZE_4KB \
@@ -38,16 +39,18 @@ if [ "$CONFIG_IA64_DIG" = "y" ]; then
define_bool CONFIG_ITANIUM y
define_bool CONFIG_IA64_BRL_EMU y
bool ' Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC
- bool ' Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
+ if [ "$CONFIG_ITANIUM_ASTEP_SPECIFIC" = "y" ]; then
+ bool ' Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
+ fi
+ bool ' Enable Itanium B-step specific code' CONFIG_ITANIUM_BSTEP_SPECIFIC
+ if [ "$CONFIG_ITANIUM_BSTEP_SPECIFIC" = "y" ]; then
+ bool ' Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC
+ fi
+ bool ' Force interrupt redirection' CONFIG_IA64_HAVE_IRQREDIR
bool ' Enable use of global TLB purge instruction (ptc.g)' CONFIG_ITANIUM_PTCG
bool ' Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS
bool ' Enable AzusA hacks' CONFIG_IA64_AZUSA_HACKS
- bool ' Emulate PAL/SAL/EFI firmware' CONFIG_IA64_FW_EMU
- bool ' Enable IA64 Machine Check Abort' CONFIG_IA64_MCA
-fi
-
-if [ "$CONFIG_IA64_GENERIC" = "y" ]; then
- define_bool CONFIG_IA64_SOFTSDV_HACKS y
+ bool ' Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA
fi
if [ "$CONFIG_IA64_SGI_SN1_SIM" = "y" ]; then
@@ -59,7 +62,7 @@ define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore.
bool 'SMP support' CONFIG_SMP
bool 'Performance monitor support' CONFIG_PERFMON
-bool '/proc/palinfo support' CONFIG_IA64_PALINFO
+bool '/proc/pal support' CONFIG_IA64_PALINFO
bool 'Networking support' CONFIG_NET
bool 'System V IPC' CONFIG_SYSVIPC
@@ -161,9 +164,9 @@ source drivers/char/Config.in
#source drivers/misc/Config.in
-source fs/Config.in
+source drivers/media/Config.in
-source fs/nls/Config.in
+source fs/Config.in
if [ "$CONFIG_VT" = "y" ]; then
mainmenu_option next_comment
@@ -188,6 +191,7 @@ fi
endmenu
source drivers/usb/Config.in
+source drivers/input/Config.in
fi # !HP_SIM
diff --git a/arch/ia64/dig/iosapic.c b/arch/ia64/dig/iosapic.c
index 2426a0193..7d9a084fd 100644
--- a/arch/ia64/dig/iosapic.c
+++ b/arch/ia64/dig/iosapic.c
@@ -22,12 +22,14 @@
#include <linux/string.h>
#include <linux/irq.h>
+#include <asm/acpi-ext.h>
+#include <asm/delay.h>
#include <asm/io.h>
#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/system.h>
-#include <asm/delay.h>
-#include <asm/processor.h>
#undef DEBUG_IRQ_ROUTING
@@ -315,10 +317,6 @@ dig_irq_init (void)
*/
outb(0xff, 0xA1);
outb(0xff, 0x21);
-
-#ifndef CONFIG_IA64_DIG
- iosapic_init(IO_SAPIC_DEFAULT_ADDR);
-#endif
}
void
@@ -337,15 +335,23 @@ dig_pci_fixup (void)
if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
struct pci_dev * bridge = dev->bus->self;
- /* do the bridge swizzle... */
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
- irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin);
+ /* allow for multiple bridges on an adapter */
+ do {
+ /* do the bridge swizzle... */
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ } while (irq < 0 && (bridge = bridge->bus->self));
if (irq >= 0)
printk(KERN_WARNING
"PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n",
bridge->bus->number, PCI_SLOT(bridge->devfn),
pin, irq);
+ else
+ printk(KERN_WARNING
+ "PCI: Couldn't map irq for B%d,I%d,P%d\n",
+ bridge->bus->number, PCI_SLOT(bridge->devfn),
+ pin);
}
if (irq >= 0) {
printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n",
@@ -361,3 +367,34 @@ dig_pci_fixup (void)
dev->irq = 15; /* Spurious interrupts */
}
}
+
+/*
+ * Register an IOSAPIC discovered via ACPI.
+ */
+void __init
+dig_register_iosapic (acpi_entry_iosapic_t *iosapic)
+{
+ unsigned int ver, v;
+ int l, max_pin;
+
+ ver = iosapic_version(iosapic->address);
+ max_pin = (ver >> 16) & 0xff;
+
+ printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n",
+ (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address,
+ iosapic->irq_base, iosapic->irq_base + max_pin);
+
+ for (l = 0; l <= max_pin; l++) {
+ v = iosapic->irq_base + l;
+ if (v < 16)
+ v = isa_irq_to_vector(v);
+ if (v > IA64_MAX_VECTORED_IRQ) {
+ printk(" !!! bad IOSAPIC interrupt vector: %u\n", v);
+ continue;
+ }
+ /* XXX Check for IOSAPIC collisions */
+ iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
+ iosapic_baseirq(v) = iosapic->irq_base;
+ }
+ iosapic_init(iosapic->address, iosapic->irq_base);
+}
diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c
index 640412d7e..4d2452745 100644
--- a/arch/ia64/dig/machvec.c
+++ b/arch/ia64/dig/machvec.c
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME dig
#include <asm/machvec_init.h>
-#include <asm/machvec_dig.h>
-
-MACHVEC_DEFINE(dig)
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index 45c1f96f5..fa48254cc 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -24,10 +24,6 @@
#include <asm/machvec.h>
#include <asm/system.h>
-#ifdef CONFIG_IA64_FW_EMU
-# include "../../kernel/fw-emu.c"
-#endif
-
/*
* This is here so we can use the CMOS detection in ide-probe.c to
* determine what drives are present. In theory, we don't need this
diff --git a/arch/ia64/hp/hpsim_machvec.c b/arch/ia64/hp/hpsim_machvec.c
index 7d78f4961..76af3b4e2 100644
--- a/arch/ia64/hp/hpsim_machvec.c
+++ b/arch/ia64/hp/hpsim_machvec.c
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME hpsim
#include <asm/machvec_init.h>
-#include <asm/machvec_hpsim.h>
-
-MACHVEC_DEFINE(hpsim)
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index a9dc378cf..ad963b92f 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -73,7 +73,7 @@ GLOBAL_ENTRY(ia32_trace_syscall)
END(ia32_trace_syscall)
GLOBAL_ENTRY(sys32_vfork)
- alloc r16=ar.pfs,2,2,3,0;;
+ alloc r16=ar.pfs,2,2,4,0;;
mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD // out0 = clone_flags
br.cond.sptk.few .fork1 // do the work
END(sys32_vfork)
@@ -105,7 +105,7 @@ END(sys32_fork)
.align 8
.globl ia32_syscall_table
ia32_syscall_table:
- data8 sys_ni_syscall /* 0 - old "setup(" system call*/
+ data8 sys32_ni_syscall /* 0 - old "setup(" system call*/
data8 sys_exit
data8 sys32_fork
data8 sys_read
@@ -122,25 +122,25 @@ ia32_syscall_table:
data8 sys_mknod
data8 sys_chmod /* 15 */
data8 sys_lchown
- data8 sys_ni_syscall /* old break syscall holder */
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall /* old break syscall holder */
+ data8 sys32_ni_syscall
data8 sys_lseek
data8 sys_getpid /* 20 */
data8 sys_mount
data8 sys_oldumount
data8 sys_setuid
data8 sys_getuid
- data8 sys_ni_syscall /* sys_stime is not supported on IA64 */ /* 25 */
+ data8 sys32_ni_syscall /* sys_stime is not supported on IA64 */ /* 25 */
data8 sys32_ptrace
data8 sys32_alarm
- data8 sys_ni_syscall
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall
+ data8 sys32_ni_syscall
data8 ia32_utime /* 30 */
- data8 sys_ni_syscall /* old stty syscall holder */
- data8 sys_ni_syscall /* old gtty syscall holder */
+ data8 sys32_ni_syscall /* old stty syscall holder */
+ data8 sys32_ni_syscall /* old gtty syscall holder */
data8 sys_access
data8 sys_nice
- data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */
+ data8 sys32_ni_syscall /* 35 */ /* old ftime syscall holder */
data8 sys_sync
data8 sys_kill
data8 sys_rename
@@ -149,22 +149,22 @@ ia32_syscall_table:
data8 sys_dup
data8 sys32_pipe
data8 sys32_times
- data8 sys_ni_syscall /* old prof syscall holder */
+ data8 sys32_ni_syscall /* old prof syscall holder */
data8 sys_brk /* 45 */
data8 sys_setgid
data8 sys_getgid
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall
data8 sys_geteuid
data8 sys_getegid /* 50 */
data8 sys_acct
data8 sys_umount /* recycled never used phys( */
- data8 sys_ni_syscall /* old lock syscall holder */
+ data8 sys32_ni_syscall /* old lock syscall holder */
data8 ia32_ioctl
- data8 sys_fcntl /* 55 */
- data8 sys_ni_syscall /* old mpx syscall holder */
+ data8 sys32_fcntl /* 55 */
+ data8 sys32_ni_syscall /* old mpx syscall holder */
data8 sys_setpgid
- data8 sys_ni_syscall /* old ulimit syscall holder */
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall /* old ulimit syscall holder */
+ data8 sys32_ni_syscall
data8 sys_umask /* 60 */
data8 sys_chroot
data8 sys_ustat
@@ -172,12 +172,12 @@ ia32_syscall_table:
data8 sys_getppid
data8 sys_getpgrp /* 65 */
data8 sys_setsid
- data8 sys_ni_syscall
- data8 sys_ni_syscall
- data8 sys_ni_syscall
+ data8 sys32_sigaction
+ data8 sys32_ni_syscall
+ data8 sys32_ni_syscall
data8 sys_setreuid /* 70 */
data8 sys_setregid
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall
data8 sys_sigpending
data8 sys_sethostname
data8 sys32_setrlimit /* 75 */
@@ -189,7 +189,7 @@ ia32_syscall_table:
data8 sys_setgroups
data8 old_select
data8 sys_symlink
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall
data8 sys_readlink /* 85 */
data8 sys_uselib
data8 sys_swapon
@@ -203,7 +203,7 @@ ia32_syscall_table:
data8 sys_fchown /* 95 */
data8 sys_getpriority
data8 sys_setpriority
- data8 sys_ni_syscall /* old profil syscall holder */
+ data8 sys32_ni_syscall /* old profil syscall holder */
data8 sys32_statfs
data8 sys32_fstatfs /* 100 */
data8 sys_ioperm
@@ -214,11 +214,11 @@ ia32_syscall_table:
data8 sys32_newstat
data8 sys32_newlstat
data8 sys32_newfstat
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall
data8 sys_iopl /* 110 */
data8 sys_vhangup
- data8 sys_ni_syscall // used to be sys_idle
- data8 sys_ni_syscall
+ data8 sys32_ni_syscall // used to be sys_idle
+ data8 sys32_ni_syscall
data8 sys32_wait4
data8 sys_swapoff /* 115 */
data8 sys_sysinfo
@@ -242,7 +242,7 @@ ia32_syscall_table:
data8 sys_bdflush
data8 sys_sysfs /* 135 */
data8 sys_personality
- data8 sys_ni_syscall /* for afs_syscall */
+ data8 sys32_ni_syscall /* for afs_syscall */
data8 sys_setfsuid
data8 sys_setfsgid
data8 sys_llseek /* 140 */
@@ -293,8 +293,8 @@ ia32_syscall_table:
data8 sys_capset /* 185 */
data8 sys_sigaltstack
data8 sys_sendfile
- data8 sys_ni_syscall /* streams1 */
- data8 sys_ni_syscall /* streams2 */
+ data8 sys32_ni_syscall /* streams1 */
+ data8 sys32_ni_syscall /* streams2 */
data8 sys32_vfork /* 190 */
/*
* CAUTION: If any system calls are added beyond this point
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index cbec84f2b..a6bf4a8d8 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -74,10 +74,14 @@ nargs(unsigned int arg, char **ap)
n = 0;
do {
- if ((err = get_user(addr, (int *)A(arg))) != 0)
- return(err);
- if (ap)
- *ap++ = (char *)A(addr);
+ err = get_user(addr, (int *)A(arg));
+ if (IS_ERR(err))
+ return err;
+ if (ap) { /* no access_ok needed, we allocated */
+ err = __put_user((char *)A(addr), ap++);
+ if (IS_ERR(err))
+ return err;
+ }
arg += sizeof(unsigned int);
n++;
} while (addr);
@@ -101,7 +105,11 @@ int stack)
int na, ne, r, len;
na = nargs(argv, NULL);
+ if (IS_ERR(na))
+ return(na);
ne = nargs(envp, NULL);
+ if (IS_ERR(ne))
+ return(ne);
len = (na + ne + 2) * sizeof(*av);
/*
* kmalloc won't work because the `sys_exec' code will attempt
@@ -121,12 +129,21 @@ int stack)
if (IS_ERR(av))
return (long)av;
ae = av + na + 1;
- av[na] = (char *)0;
- ae[ne] = (char *)0;
- (void)nargs(argv, av);
- (void)nargs(envp, ae);
+ r = __put_user(0, (av + na));
+ if (IS_ERR(r))
+ goto out;
+ r = __put_user(0, (ae + ne));
+ if (IS_ERR(r))
+ goto out;
+ r = nargs(argv, av);
+ if (IS_ERR(r))
+ goto out;
+ r = nargs(envp, ae);
+ if (IS_ERR(r))
+ goto out;
r = sys_execve(filename, av, ae, regs);
if (IS_ERR(r))
+out:
sys_munmap((unsigned long) av, len);
return(r);
}
@@ -711,7 +728,8 @@ struct readdir32_callback {
};
static int
-filldir32 (void *__buf, const char *name, int namlen, off_t offset, ino_t ino)
+filldir32 (void *__buf, const char *name, int namlen, off_t offset, ino_t ino,
+ unsigned int d_type)
{
struct linux32_dirent * dirent;
struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
@@ -959,150 +977,85 @@ sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp)
}
struct iovec32 { unsigned int iov_base; int iov_len; };
+asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long);
+asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
-typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *);
-
-static long
-do_readv_writev32(int type, struct file *file, const struct iovec32 *vector,
- u32 count)
+static struct iovec *
+get_iovec32(struct iovec32 *iov32, struct iovec *iov_buf, u32 count, int type)
{
- unsigned long tot_len;
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov=iovstack, *ivp;
- struct inode *inode;
- long retval, i;
- IO_fn_t fn;
+ int i;
+ u32 buf, len;
+ struct iovec *ivp, *iov;
+
+ /* Get the "struct iovec" from user memory */
- /* First get the "struct iovec" from user memory and
- * verify all the pointers
- */
if (!count)
return 0;
- if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count))
- return -EFAULT;
+ if(verify_area(VERIFY_READ, iov32, sizeof(struct iovec32)*count))
+ return(struct iovec *)0;
if (count > UIO_MAXIOV)
- return -EINVAL;
+ return(struct iovec *)0;
if (count > UIO_FASTIOV) {
iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
if (!iov)
- return -ENOMEM;
- }
+ return((struct iovec *)0);
+ } else
+ iov = iov_buf;
- tot_len = 0;
- i = count;
ivp = iov;
- while(i > 0) {
- u32 len;
- u32 buf;
-
- __get_user(len, &vector->iov_len);
- __get_user(buf, &vector->iov_base);
- tot_len += len;
+ for (i = 0; i < count; i++) {
+ if (__get_user(len, &iov32->iov_len) ||
+ __get_user(buf, &iov32->iov_base)) {
+ if (iov != iov_buf)
+ kfree(iov);
+ return((struct iovec *)0);
+ }
+ if (verify_area(type, (void *)A(buf), len)) {
+ if (iov != iov_buf)
+ kfree(iov);
+ return((struct iovec *)0);
+ }
ivp->iov_base = (void *)A(buf);
- ivp->iov_len = (__kernel_size_t) len;
- vector++;
- ivp++;
- i--;
- }
-
- inode = file->f_dentry->d_inode;
- /* VERIFY_WRITE actually means a read, as we write to user space */
- retval = locks_verify_area((type == VERIFY_WRITE
- ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE),
- inode, file, file->f_pos, tot_len);
- if (retval) {
- if (iov != iovstack)
- kfree(iov);
- return retval;
- }
-
- /* Then do the actual IO. Note that sockets need to be handled
- * specially as they have atomicity guarantees and can handle
- * iovec's natively
- */
- if (inode->i_sock) {
- int err;
- err = sock_readv_writev(type, inode, file, iov, count, tot_len);
- if (iov != iovstack)
- kfree(iov);
- return err;
- }
-
- if (!file->f_op) {
- if (iov != iovstack)
- kfree(iov);
- return -EINVAL;
- }
- /* VERIFY_WRITE actually means a read, as we write to user space */
- fn = file->f_op->read;
- if (type == VERIFY_READ)
- fn = (IO_fn_t) file->f_op->write;
- ivp = iov;
- while (count > 0) {
- void * base;
- int len, nr;
-
- base = ivp->iov_base;
- len = ivp->iov_len;
+ ivp->iov_len = (__kernel_size_t)len;
+ iov32++;
ivp++;
- count--;
- nr = fn(file, base, len, &file->f_pos);
- if (nr < 0) {
- if (retval)
- break;
- retval = nr;
- break;
- }
- retval += nr;
- if (nr != len)
- break;
}
- if (iov != iovstack)
- kfree(iov);
- return retval;
+ return(iov);
}
asmlinkage long
sys32_readv(int fd, struct iovec32 *vector, u32 count)
{
- struct file *file;
- long ret = -EBADF;
-
- file = fget(fd);
- if(!file)
- goto bad_file;
-
- if(!(file->f_mode & 1))
- goto out;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov;
+ int ret;
+ mm_segment_t old_fs = get_fs();
- ret = do_readv_writev32(VERIFY_WRITE, file,
- vector, count);
-out:
- fput(file);
-bad_file:
+ if ((iov = get_iovec32(vector, iovstack, count, VERIFY_WRITE)) == (struct iovec *)0)
+ return -EFAULT;
+ set_fs(KERNEL_DS);
+ ret = sys_readv(fd, iov, count);
+ set_fs(old_fs);
+ if (iov != iovstack)
+ kfree(iov);
return ret;
}
asmlinkage long
sys32_writev(int fd, struct iovec32 *vector, u32 count)
{
- struct file *file;
- int ret = -EBADF;
-
- file = fget(fd);
- if(!file)
- goto bad_file;
-
- if(!(file->f_mode & 2))
- goto out;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov;
+ int ret;
+ mm_segment_t old_fs = get_fs();
- down(&file->f_dentry->d_inode->i_sem);
- ret = do_readv_writev32(VERIFY_READ, file,
- vector, count);
- up(&file->f_dentry->d_inode->i_sem);
-out:
- fput(file);
-bad_file:
+ if ((iov = get_iovec32(vector, iovstack, count, VERIFY_READ)) == (struct iovec *)0)
+ return -EFAULT;
+ set_fs(KERNEL_DS);
+ ret = sys_writev(fd, iov, count);
+ set_fs(old_fs);
+ if (iov != iovstack)
+ kfree(iov);
return ret;
}
@@ -1173,21 +1126,22 @@ struct msghdr32 {
static inline int
shape_msg(struct msghdr *mp, struct msghdr32 *mp32)
{
+ int ret;
unsigned int i;
if (!access_ok(VERIFY_READ, mp32, sizeof(*mp32)))
return(-EFAULT);
- __get_user(i, &mp32->msg_name);
+ ret = __get_user(i, &mp32->msg_name);
mp->msg_name = (void *)A(i);
- __get_user(mp->msg_namelen, &mp32->msg_namelen);
- __get_user(i, &mp32->msg_iov);
+ ret |= __get_user(mp->msg_namelen, &mp32->msg_namelen);
+ ret |= __get_user(i, &mp32->msg_iov);
mp->msg_iov = (struct iovec *)A(i);
- __get_user(mp->msg_iovlen, &mp32->msg_iovlen);
- __get_user(i, &mp32->msg_control);
+ ret |= __get_user(mp->msg_iovlen, &mp32->msg_iovlen);
+ ret |= __get_user(i, &mp32->msg_control);
mp->msg_control = (void *)A(i);
- __get_user(mp->msg_controllen, &mp32->msg_controllen);
- __get_user(mp->msg_flags, &mp32->msg_flags);
- return(0);
+ ret |= __get_user(mp->msg_controllen, &mp32->msg_controllen);
+ ret |= __get_user(mp->msg_flags, &mp32->msg_flags);
+ return(ret ? -EFAULT : 0);
}
/*
@@ -2341,17 +2295,17 @@ restore_ia32_fpstate(struct task_struct *tsk, struct _fpstate_ia32 *save)
{
struct switch_stack *swp;
struct pt_regs *ptp;
- int i, tos;
+ int i, tos, ret;
int fsrlo, fsrhi;
if (!access_ok(VERIFY_READ, save, sizeof(*save)))
return(-EIO);
- __get_user(tsk->thread.fcr, (unsigned int *)&save->cw);
- __get_user(fsrlo, (unsigned int *)&save->sw);
- __get_user(fsrhi, (unsigned int *)&save->tag);
+ ret = __get_user(tsk->thread.fcr, (unsigned int *)&save->cw);
+ ret |= __get_user(fsrlo, (unsigned int *)&save->sw);
+ ret |= __get_user(fsrhi, (unsigned int *)&save->tag);
tsk->thread.fsr = ((long)fsrhi << 32) | (long)fsrlo;
- __get_user(tsk->thread.fir, (unsigned int *)&save->ipoff);
- __get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff);
+ ret |= __get_user(tsk->thread.fir, (unsigned int *)&save->ipoff);
+ ret |= __get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff);
/*
* Stack frames start with 16-bytes of temp space
*/
@@ -2360,7 +2314,7 @@ restore_ia32_fpstate(struct task_struct *tsk, struct _fpstate_ia32 *save)
tos = (tsk->thread.fsr >> 11) & 3;
for (i = 0; i < 8; i++)
get_fpreg(i, &save->_st[i], ptp, swp, tos);
- return(0);
+ return(ret ? -EFAULT : 0);
}
asmlinkage long sys_ptrace(long, pid_t, unsigned long, unsigned long, long, long, long, long, long);
@@ -2492,6 +2446,105 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
return ret;
}
+static inline int
+get_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+ int err;
+
+ err = get_user(kfl->l_type, &ufl->l_type);
+ err |= __get_user(kfl->l_whence, &ufl->l_whence);
+ err |= __get_user(kfl->l_start, &ufl->l_start);
+ err |= __get_user(kfl->l_len, &ufl->l_len);
+ err |= __get_user(kfl->l_pid, &ufl->l_pid);
+ return err;
+}
+
+static inline int
+put_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+ int err;
+
+ err = __put_user(kfl->l_type, &ufl->l_type);
+ err |= __put_user(kfl->l_whence, &ufl->l_whence);
+ err |= __put_user(kfl->l_start, &ufl->l_start);
+ err |= __put_user(kfl->l_len, &ufl->l_len);
+ err |= __put_user(kfl->l_pid, &ufl->l_pid);
+ return err;
+}
+
+extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+
+asmlinkage long
+sys32_fcntl(unsigned int fd, unsigned int cmd, int arg)
+{
+ struct flock f;
+ mm_segment_t old_fs;
+ long ret;
+
+ switch (cmd) {
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ if(cmd != F_GETLK && get_flock32(&f, (struct flock32 *)((long)arg)))
+ return -EFAULT;
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_fcntl(fd, cmd, (unsigned long)&f);
+ set_fs(old_fs);
+ if(cmd == F_GETLK && put_flock32(&f, (struct flock32 *)((long)arg)))
+ return -EFAULT;
+ return ret;
+ default:
+ /*
+ * `sys_fcntl' lies about arg, for the F_SETOWN
+ * sub-function arg can have a negative value.
+ */
+ return sys_fcntl(fd, cmd, (unsigned long)((long)arg));
+ }
+}
+
+asmlinkage long
+sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+ if (act) {
+ old_sigset32_t mask;
+
+ ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
+ ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ ret |= __get_user(mask, &act->sa_mask);
+ if (ret)
+ return ret;
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
+ ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+ }
+
+ return ret;
+}
+
+asmlinkage long sys_ni_syscall(void);
+
+asmlinkage long
+sys32_ni_syscall(int dummy0, int dummy1, int dummy2, int dummy3,
+ int dummy4, int dummy5, int dummy6, int dummy7, int stack)
+{
+ struct pt_regs *regs = (struct pt_regs *)&stack;
+
+ printk("IA32 syscall #%d issued, maybe we should implement it\n",
+ (int)regs->r1);
+ return(sys_ni_syscall());
+}
+
#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */
/* In order to reduce some races, while at the same time doing additional
@@ -2545,61 +2598,6 @@ sys32_ioperm(u32 from, u32 num, int on)
return sys_ioperm((unsigned long)from, (unsigned long)num, on);
}
-static inline int
-get_flock(struct flock *kfl, struct flock32 *ufl)
-{
- int err;
-
- err = get_user(kfl->l_type, &ufl->l_type);
- err |= __get_user(kfl->l_whence, &ufl->l_whence);
- err |= __get_user(kfl->l_start, &ufl->l_start);
- err |= __get_user(kfl->l_len, &ufl->l_len);
- err |= __get_user(kfl->l_pid, &ufl->l_pid);
- return err;
-}
-
-static inline int
-put_flock(struct flock *kfl, struct flock32 *ufl)
-{
- int err;
-
- err = __put_user(kfl->l_type, &ufl->l_type);
- err |= __put_user(kfl->l_whence, &ufl->l_whence);
- err |= __put_user(kfl->l_start, &ufl->l_start);
- err |= __put_user(kfl->l_len, &ufl->l_len);
- err |= __put_user(kfl->l_pid, &ufl->l_pid);
- return err;
-}
-
-extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
- unsigned long arg);
-
-asmlinkage long
-sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- case F_GETLK:
- case F_SETLK:
- case F_SETLKW:
- {
- struct flock f;
- mm_segment_t old_fs;
- long ret;
-
- if(get_flock(&f, (struct flock32 *)arg))
- return -EFAULT;
- old_fs = get_fs(); set_fs (KERNEL_DS);
- ret = sys_fcntl(fd, cmd, (unsigned long)&f);
- set_fs (old_fs);
- if(put_flock(&f, (struct flock32 *)arg))
- return -EFAULT;
- return ret;
- }
- default:
- return sys_fcntl(fd, cmd, (unsigned long)arg);
- }
-}
-
struct dqblk32 {
__u32 dqb_bhardlimit;
__u32 dqb_bsoftlimit;
@@ -3862,40 +3860,6 @@ out:
extern void check_pending(int signum);
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 *act,
- struct old_sigaction32 *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if(sig < 0) {
- current->tss.new_signal = 1;
- sig = -sig;
- }
-
- if (act) {
- old_sigset_t32 mask;
-
- ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
- ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
- ret |= __get_user(mask, &act->sa_mask);
- if (ret)
- return ret;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
- ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
- }
-
- return ret;
-}
-
#ifdef CONFIG_MODULES
extern asmlinkage unsigned long sys_create_module(const char *name_user,
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index dfba2529a..563c308ea 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -9,8 +9,8 @@
all: kernel.o head.o init_task.o
-obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \
- pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \
+obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \
+ machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \
signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 20521da36..4bba56e1d 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -19,10 +19,11 @@
#include <linux/irq.h>
#include <asm/acpi-ext.h>
-#include <asm/page.h>
#include <asm/efi.h>
#include <asm/io.h>
#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
#undef ACPI_DEBUG /* Guess what this does? */
@@ -75,47 +76,6 @@ acpi_lsapic(char *p)
}
/*
- * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate
- * base addresses.
- */
-static void __init
-acpi_iosapic(char *p)
-{
- /*
- * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet
- * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be
- * a means for platform_setup() to register ACPI handlers?
- */
-#ifdef CONFIG_IA64_DIG
- acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p;
- unsigned int ver, v;
- int l, max_pin;
-
- ver = iosapic_version(iosapic->address);
- max_pin = (ver >> 16) & 0xff;
-
- printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n",
- (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address,
- iosapic->irq_base, iosapic->irq_base + max_pin);
-
- for (l = 0; l <= max_pin; l++) {
- v = iosapic->irq_base + l;
- if (v < 16)
- v = isa_irq_to_vector(v);
- if (v > IA64_MAX_VECTORED_IRQ) {
- printk(" !!! bad IOSAPIC interrupt vector: %u\n", v);
- continue;
- }
- /* XXX Check for IOSAPIC collisions */
- iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
- iosapic_baseirq(v) = iosapic->irq_base;
- }
- iosapic_init(iosapic->address, iosapic->irq_base);
-#endif
-}
-
-
-/*
* Configure legacy IRQ information in iosapic_vector
*/
static void __init
@@ -227,7 +187,7 @@ acpi_parse_msapic(acpi_sapic_t *msapic)
break;
case ACPI_ENTRY_IO_SAPIC:
- acpi_iosapic(p);
+ platform_register_iosapic((acpi_entry_iosapic_t *) p);
break;
case ACPI_ENTRY_INT_SRC_OVERRIDE:
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index c4383b97f..d55835df6 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -33,9 +33,10 @@
extern efi_status_t efi_call_phys (void *, ...);
struct efi efi;
-
static efi_runtime_services_t *runtime;
+static unsigned long mem_limit = ~0UL;
+
static efi_status_t
phys_get_time (efi_time_t *tm, efi_time_cap_t *tc)
{
@@ -169,15 +170,13 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
- if (md->phys_addr > 1024*1024*1024UL) {
- printk("Warning: ignoring %luMB of memory above 1GB!\n",
- md->num_pages >> 8);
- md->type = EFI_UNUSABLE_MEMORY;
- continue;
- }
-
if (!(md->attribute & EFI_MEMORY_WB))
continue;
+ if (md->phys_addr + (md->num_pages << 12) > mem_limit) {
+ if (md->phys_addr > mem_limit)
+ continue;
+ md->num_pages = (mem_limit - md->phys_addr) >> 12;
+ }
if (md->num_pages == 0) {
printk("efi_memmap_walk: ignoring empty region at 0x%lx",
md->phys_addr);
@@ -224,8 +223,8 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
* ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
* Abstraction Layer chapter 11 in ADAG
*/
-static void
-map_pal_code (void)
+void
+efi_map_pal_code (void)
{
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
@@ -240,13 +239,14 @@ map_pal_code (void)
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
md = p;
- if (md->type != EFI_PAL_CODE) continue;
+ if (md->type != EFI_PAL_CODE)
+ continue;
if (++pal_code_count > 1) {
printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
md->phys_addr);
continue;
- }
+ }
mask = ~((1 << _PAGE_SIZE_4M)-1); /* XXX should be dynamic? */
vaddr = PAGE_OFFSET + md->phys_addr;
@@ -281,9 +281,28 @@ efi_init (void)
efi_config_table_t *config_tables;
efi_char16_t *c16;
u64 efi_desc_size;
- char vendor[100] = "unknown";
+ char *cp, *end, vendor[100] = "unknown";
+ extern char saved_command_line[];
int i;
+ /* it's too early to be able to use the standard kernel command line support... */
+ for (cp = saved_command_line; *cp; ) {
+ if (memcmp(cp, "mem=", 4) == 0) {
+ cp += 4;
+ mem_limit = memparse(cp, &end) - 1;
+ if (end != cp)
+ break;
+ cp = end;
+ } else {
+ while (*cp != ' ' && *cp)
+ ++cp;
+ while (*cp == ' ')
+ ++cp;
+ }
+ }
+ if (mem_limit != ~0UL)
+ printk("Ignoring memory above %luMB\n", mem_limit >> 20);
+
efi.systab = __va(ia64_boot_param.efi_systab);
/*
@@ -359,7 +378,7 @@ efi_init (void)
}
#endif
- map_pal_code();
+ efi_map_pal_code();
}
void
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index f9beac21d..e37bd0df8 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -106,29 +106,19 @@ GLOBAL_ENTRY(ia64_switch_to)
alloc r16=ar.pfs,1,0,0,0
DO_SAVE_SWITCH_STACK
UNW(.body)
- // disable interrupts to ensure atomicity for next few instructions:
- mov r17=psr // M-unit
- ;;
- rsm psr.i // M-unit
- dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff
- ;;
- srlz.d
- ;;
+
adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff
adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
;;
st8 [r22]=sp // save kernel stack pointer of old task
ld8 sp=[r21] // load kernel stack pointer of new task
and r20=in0,r18 // physical address of "current"
;;
+ mov ar.k6=r20 // copy "current" into ar.k6
mov r8=r13 // return pointer to previously running task
mov r13=in0 // set "current" pointer
- mov ar.k6=r20 // copy "current" into ar.k6
;;
- // restore interrupts
- mov psr.l=r17
- ;;
- srlz.d
DO_LOAD_SWITCH_STACK( )
br.ret.sptk.few rp
END(ia64_switch_to)
@@ -1207,7 +1197,7 @@ sys_call_table:
data8 sys_newlstat
data8 sys_newfstat
data8 sys_clone2
- data8 ia64_ni_syscall
+ data8 sys_getdents64
data8 ia64_ni_syscall // 1215
data8 ia64_ni_syscall
data8 ia64_ni_syscall
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index b2abc48a4..bea14236d 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -181,7 +181,9 @@ END(ia64_save_debug_regs)
GLOBAL_ENTRY(ia64_load_debug_regs)
alloc r16=ar.pfs,1,0,0,0
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
lfetch.nta [in0]
+#endif
mov r20=ar.lc // preserve ar.lc
add r19=IA64_NUM_DBG_REGS*8,in0
mov ar.lc=IA64_NUM_DBG_REGS-1
@@ -702,3 +704,74 @@ SET_REG(b4);
SET_REG(b5);
#endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+
+ /*
+ * This routine handles spinlock contention. It uses a simple exponential backoff
+ * algorithm to reduce unnecessary bus traffic. The initial delay is selected from
+ * the low-order bits of the cycle counter (a cheap "randomizer"). I'm sure this
+ * could use additional tuning, especially on systems with a large number of CPUs.
+ * Also, I think the maximum delay should be made a function of the number of CPUs in
+ * the system. --davidm 00/08/05
+ *
+ * WARNING: This is not a normal procedure. It gets called from C code without
+ * the compiler knowing about it. Thus, we must not use any scratch registers
+ * beyond those that were declared "clobbered" at the call-site (see spin_lock()
+ * macro). We may not even use the stacked registers, because that could overwrite
+ * output registers. Similarly, we can't use the scratch stack area as it may be
+ * in use, too.
+ *
+ * Inputs:
+ * ar.ccv = 0 (and available for use)
+ * r28 = available for use
+ * r29 = available for use
+ * r30 = non-zero (and available for use)
+ * r31 = address of lock we're trying to acquire
+ * p15 = available for use
+ */
+
+# define delay r28
+# define timeout r29
+# define tmp r30
+
+GLOBAL_ENTRY(ia64_spinlock_contention)
+ mov tmp=ar.itc
+ ;;
+ and delay=0x3f,tmp
+ ;;
+
+.retry: add timeout=tmp,delay
+ shl delay=delay,1
+ ;;
+ dep delay=delay,r0,0,13 // limit delay to 8192 cycles
+ ;;
+ // delay a little...
+.wait: sub tmp=tmp,timeout
+ or delay=0xf,delay // make sure delay is non-zero (otherwise we get stuck with 0)
+ ;;
+ cmp.lt p15,p0=tmp,r0
+ mov tmp=ar.itc
+(p15) br.cond.sptk .wait
+ ;;
+ ld1 tmp=[r31]
+ ;;
+ cmp.ne p15,p0=tmp,r0
+ mov tmp=ar.itc
+(p15) br.cond.sptk.few .retry // lock is still busy
+ ;;
+ // try acquiring lock (we know ar.ccv is still zero!):
+ mov tmp=1
+ ;;
+ IA64_SEMFIX_INSN
+ cmpxchg1.acq tmp=[r31],tmp,ar.ccv
+ ;;
+ cmp.eq p15,p0=tmp,r0
+
+ mov tmp=ar.itc
+(p15) br.ret.sptk.many b7 // got lock -> return
+ br .retry // still no luck, retry
+
+END(ia64_spinlock_contention)
+
+#endif
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 2e4ffe403..62e792612 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -18,6 +18,7 @@ EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strncat);
EXPORT_SYMBOL(strncmp);
EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strtok);
#include <linux/pci.h>
@@ -37,6 +38,7 @@ EXPORT_SYMBOL(cpu_data);
EXPORT_SYMBOL(kernel_thread);
#ifdef CONFIG_SMP
+#include <asm/hardirq.h>
EXPORT_SYMBOL(synchronize_irq);
#include <asm/smplock.h>
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 1ee2974b5..fe686db0e 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -117,6 +117,13 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
{
unsigned long bsp, sp;
+ /*
+ * Note: if the interrupt happened while executing in
+ * the context switch routine (ia64_switch_to), we may
+ * get a spurious stack overflow here. This is
+ * because the register and the memory stack are not
+ * switched atomically.
+ */
asm ("mov %0=ar.bsp" : "=r"(bsp));
asm ("mov %0=sp" : "=r"(sp));
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 9f611c5b0..d1b599f77 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -170,33 +170,27 @@ ia64_ivt:
* The ITLB basically does the same as the VHPT handler except
* that we always insert exactly one instruction TLB entry.
*/
-#if 1
/*
* Attempt to lookup PTE through virtual linear page table.
* The speculative access will fail if there is no TLB entry
* for the L3 page table page we're trying to access.
*/
- mov r31=pr // save predicates
- ;;
- thash r17=r16 // compute virtual address of L3 PTE
+ mov r16=cr.iha // get virtual address of L3 PTE
;;
- ld8.s r18=[r17] // try to read L3 PTE
+ ld8.s r16=[r16] // try to read L3 PTE
+ mov r31=pr // save predicates
;;
- tnat.nz p6,p0=r18 // did read succeed?
+ tnat.nz p6,p0=r16 // did read succeed?
(p6) br.cond.spnt.many 1f
;;
- itc.i r18
+ itc.i r16
;;
mov pr=r31,-1
rfi
-1: rsm psr.dt // use physical addressing for data
-#else
- mov r16=cr.ifa // get address that caused the TLB miss
+1: mov r16=cr.ifa // get address that caused the TLB miss
;;
rsm psr.dt // use physical addressing for data
-#endif
- mov r31=pr // save the predicate registers
mov r19=ar.k7 // get page table base address
shl r21=r16,3 // shift bit 60 into sign bit
shr.u r17=r16,61 // get the region number into r17
@@ -244,33 +238,27 @@ ia64_ivt:
* The DTLB basically does the same as the VHPT handler except
* that we always insert exactly one data TLB entry.
*/
- mov r16=cr.ifa // get address that caused the TLB miss
-#if 1
/*
* Attempt to lookup PTE through virtual linear page table.
* The speculative access will fail if there is no TLB entry
* for the L3 page table page we're trying to access.
*/
- mov r31=pr // save predicates
+ mov r16=cr.iha // get virtual address of L3 PTE
;;
- thash r17=r16 // compute virtual address of L3 PTE
- ;;
- ld8.s r18=[r17] // try to read L3 PTE
+ ld8.s r16=[r16] // try to read L3 PTE
+ mov r31=pr // save predicates
;;
- tnat.nz p6,p0=r18 // did read succeed?
+ tnat.nz p6,p0=r16 // did read succeed?
(p6) br.cond.spnt.many 1f
;;
- itc.d r18
+ itc.d r16
;;
mov pr=r31,-1
rfi
-1: rsm psr.dt // use physical addressing for data
-#else
- rsm psr.dt // use physical addressing for data
- mov r31=pr // save the predicate registers
+1: mov r16=cr.ifa // get address that caused the TLB miss
;;
-#endif
+ rsm psr.dt // use physical addressing for data
mov r19=ar.k7 // get page table base address
shl r21=r16,3 // shift bit 60 into sign bit
shr.u r17=r16,61 // get the region number into r17
@@ -504,7 +492,24 @@ page_fault:
mov r29=b0 // save b0 in case of nested fault)
;;
1: ld8 r18=[r17]
- ;; // avoid raw on r18
+#if defined(CONFIG_IA32_SUPPORT) && \
+ (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
+ //
+ // Erratum 85 (Access bit fault could be reported before page not present fault)
+ // If the PTE is indicates the page is not present, then just turn this into a
+ // page fault.
+ //
+ mov r31=pr // save predicates
+ ;;
+ tbit.nz p6,p0=r18,0 // page present bit set?
+(p6) br.cond.sptk 1f
+ ;; // avoid WAW on p6
+ mov pr=r31,-1
+ br.cond.sptk page_fault // page wasn't present
+1: mov pr=r31,-1
+#else
+ ;; // avoid RAW on r18
+#endif
or r18=_PAGE_A,r18 // set the accessed bit
mov b0=r29 // restore b0
;;
@@ -541,14 +546,6 @@ page_fault:
;;
srlz.d // ensure everyone knows psr.dt is off...
cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so)
-#if 1
- // Allow syscalls via the old system call number for the time being. This is
- // so we can transition to the new syscall number in a relatively smooth
- // fashion.
- mov r17=0x80000
- ;;
-(p7) cmp.eq.or.andcm p0,p7=r16,r17 // is this the old syscall number?
-#endif
(p7) br.cond.spnt.many non_syscall
SAVE_MIN // uses r31; defines r2:
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 153fb5684..2afb5613e 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -1,14 +1,12 @@
+#include <linux/config.h>
#include <linux/kernel.h>
#include <asm/page.h>
#include <asm/machvec.h>
-struct ia64_machine_vector ia64_mv;
+#ifdef CONFIG_IA64_GENERIC
-void
-machvec_noop (void)
-{
-}
+struct ia64_machine_vector ia64_mv;
/*
* Most platforms use this routine for mapping page frame addresses
@@ -46,3 +44,10 @@ machvec_init (const char *name)
ia64_mv = *mv;
printk("booting generic kernel on platform %s\n", name);
}
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_noop (void)
+{
+}
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index 147e2b8fe..0b07163dc 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -191,3 +191,57 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
srlz.d // seralize restoration of psr.l
br.ret.sptk.few b0
END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ * in0 Index of PAL service
+ * in2 - in3 Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+ UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5))
+ alloc loc1 = ar.pfs,5,5,86,0
+ movl loc2 = pal_entry_point
+1: {
+ mov r28 = in0 // copy procedure index
+ mov loc0 = rp // save rp
+ }
+ .body
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ mov out0 = in0 // first argument
+ mov out1 = in1 // copy arg2
+ mov out2 = in2 // copy arg3
+ mov out3 = in3 // copy arg3
+ ;;
+ mov loc3 = psr // save psr
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
+ ;;
+ mov ar.rsc=r0 // put RSE in enforced lazy, LE mode
+ movl r16=PAL_PSR_BITS_TO_CLEAR
+ movl r17=PAL_PSR_BITS_TO_SET
+ ;;
+ or loc3=loc3,r17 // add in psr the bits to set
+ mov b7 = loc2 // install target to branch reg
+ ;;
+ andcm r16=loc3,r16 // removes bits to clear from psr
+ br.call.sptk.few rp=ia64_switch_mode
+.ret6:
+ br.call.sptk.many rp=b7 // now make the call
+.ret7:
+ mov ar.rsc=r0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3 // r16= original psr
+ br.call.sptk.few rp=ia64_switch_mode // return to virtual mode
+
+.ret8: mov psr.l = loc3 // restore init PSR
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+ mov ar.rsc=loc4 // restore RSE configuration
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.few b0
+END(ia64_pal_call_phys_stacked)
+
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index ad40e911e..ecd7b0886 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -16,24 +16,41 @@
* are empty for now.
* - remove hack to avoid problem with <= 256M RAM for itr.
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/mm.h>
+#include <linux/module.h>
+#if defined(MODVERSIONS)
+#include <linux/modversions.h>
+#endif
#include <asm/pal.h>
#include <asm/sal.h>
#include <asm/efi.h>
#include <asm/page.h>
#include <asm/processor.h>
+#ifdef CONFIG_SMP
+#include <linux/smp.h>
+#endif
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 PAL");
/*
- * Hope to get rid of these in a near future
+ * Hope to get rid of this one in a near future
*/
#define IA64_PAL_VERSION_BUG 1
-#define PALINFO_VERSION "0.1"
+#define PALINFO_VERSION "0.3"
+
+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i) 1
+#endif
typedef int (*palinfo_func_t)(char*);
@@ -43,7 +60,6 @@ typedef struct {
struct proc_dir_entry *entry; /* registered entry (removal) */
} palinfo_entry_t;
-static struct proc_dir_entry *palinfo_dir;
/*
* A bunch of string array to get pretty printing
@@ -95,7 +111,7 @@ static const char *rse_hints[]={
#define RSE_HINTS_COUNT (sizeof(rse_hints)/sizeof(const char *))
/*
- * The current resvision of the Volume 2 of
+ * The current revision of the Volume 2 of
* IA-64 Architecture Software Developer's Manual is wrong.
* Table 4-10 has invalid information concerning the ma field:
* Correct table is:
@@ -121,64 +137,31 @@ static const char *mem_attrib[]={
/*
* Allocate a buffer suitable for calling PAL code in Virtual mode
*
- * The documentation (PAL2.6) requires thius buffer to have a pinned
- * translation to avoid any DTLB faults. For this reason we allocate
- * a page (large enough to hold any possible reply) and use a DTC
- * to hold the translation during the call. A call the free_palbuffer()
- * is required to release ALL resources (page + translation).
- *
- * The size of the page allocated is based on the PAGE_SIZE defined
- * at compile time for the kernel, i.e. >= 4Kb.
+ * The documentation (PAL2.6) allows DTLB misses on the buffer. So
+ * using the TC is enough, no need to pin the entry.
*
- * Return: a pointer to the newly allocated page (virtual address)
+ * We allocate a kernel-sized page (at least 4KB). This is enough to
+ * hold any possible reply.
*/
-static void *
+static inline void *
get_palcall_buffer(void)
{
void *tmp;
tmp = (void *)__get_free_page(GFP_KERNEL);
if (tmp == 0) {
- printk(KERN_ERR "%s: can't get a buffer page\n", __FUNCTION__);
- } else if ( ((u64)tmp - PAGE_OFFSET) > (1<<_PAGE_SIZE_256M) ) { /* XXX: temporary hack */
- unsigned long flags;
-
- /* PSR.ic must be zero to insert new DTR */
- ia64_clear_ic(flags);
-
- /*
- * we only insert of DTR
- *
- * XXX: we need to figure out a way to "allocate" TR(s) to avoid
- * conflicts. Maybe something in an include file like pgtable.h
- * page.h or processor.h
- *
- * ITR0/DTR0: used for kernel code/data
- * ITR1/DTR1: used by HP simulator
- * ITR2/DTR2: used to map PAL code
- */
- ia64_itr(0x2, 3, (u64)tmp,
- pte_val(mk_pte_phys(__pa(tmp), __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW))), PAGE_SHIFT);
-
- ia64_srlz_d ();
-
- __restore_flags(flags);
- }
-
+ printk(KERN_ERR __FUNCTION__" : can't get a buffer page\n");
+ }
return tmp;
}
/*
* Free a palcall buffer allocated with the previous call
- *
- * The translation is also purged.
*/
-static void
+static inline void
free_palcall_buffer(void *addr)
{
__free_page(addr);
- ia64_ptr(0x2, (u64)addr, PAGE_SHIFT);
- ia64_srlz_d ();
}
/*
@@ -564,7 +547,6 @@ processor_info(char *page)
int i;
s64 ret;
- /* must be in physical mode */
if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
@@ -577,6 +559,57 @@ processor_info(char *page)
return p - page;
}
+static const char *bus_features[]={
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+ NULL,NULL,
+ "Request Bus Parking",
+ "Bus Lock Mask",
+ "Enable Half Transfer",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL,
+ "Disable Transaction Queuing",
+ "Disable Reponse Error Checking",
+ "Disable Bus Error Checking",
+ "Disable Bus Requester Internal Error Signalling",
+ "Disable Bus Requester Error Signalling",
+ "Disable Bus Initialization Event Checking",
+ "Disable Bus Initialization Event Signalling",
+ "Disable Bus Address Error Checking",
+ "Disable Bus Address Error Signalling",
+ "Disable Bus Data Error Checking"
+};
+
+
+static int
+bus_info(char *page)
+{
+ char *p = page;
+ const char **v = bus_features;
+ pal_bus_features_u_t av, st, ct;
+ u64 avail, status, control;
+ int i;
+ s64 ret;
+
+ if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+
+ avail = av.pal_bus_features_val;
+ status = st.pal_bus_features_val;
+ control = ct.pal_bus_features_val;
+
+ for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
+ if ( ! *v ) continue;
+ p += sprintf(p, "%-48s : %s%s %s\n", *v,
+ avail & 0x1 ? "" : "NotImpl",
+ avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
+ avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+ }
+ return p - page;
+}
+
+
/*
* physical mode call for PAL_VERSION is working fine.
* This function is meant to go away once PAL get fixed.
@@ -613,21 +646,25 @@ version_info(char *page)
#endif
if (status != 0) return 0;
- p += sprintf(p, "PAL_vendor : 0x%x (min=0x%x)\n" \
- "PAL_A revision : 0x%x (min=0x%x)\n" \
- "PAL_A model : 0x%x (min=0x%x)\n" \
- "PAL_B mode : 0x%x (min=0x%x)\n" \
- "PAL_B revision : 0x%x (min=0x%x)\n",
+ p += sprintf(p, "PAL_vendor : 0x%02x (min=0x%02x)\n" \
+ "PAL_A : %x.%x.%x (min=%x.%x.%x)\n" \
+ "PAL_B : %x.%x.%x (min=%x.%x.%x)\n",
cur_ver.pal_version_s.pv_pal_vendor,
min_ver.pal_version_s.pv_pal_vendor,
+
+ cur_ver.pal_version_s.pv_pal_a_model>>4,
+ cur_ver.pal_version_s.pv_pal_a_model&0xf,
cur_ver.pal_version_s.pv_pal_a_rev,
- cur_ver.pal_version_s.pv_pal_a_rev,
- cur_ver.pal_version_s.pv_pal_a_model,
- min_ver.pal_version_s.pv_pal_a_model,
+ min_ver.pal_version_s.pv_pal_a_model>>4,
+ min_ver.pal_version_s.pv_pal_a_model&0xf,
+ min_ver.pal_version_s.pv_pal_a_rev,
+
+ cur_ver.pal_version_s.pv_pal_b_model>>4,
+ cur_ver.pal_version_s.pv_pal_b_model&0xf,
cur_ver.pal_version_s.pv_pal_b_rev,
- min_ver.pal_version_s.pv_pal_b_rev,
- cur_ver.pal_version_s.pv_pal_b_model,
- min_ver.pal_version_s.pv_pal_b_model);
+ min_ver.pal_version_s.pv_pal_b_model>>4,
+ min_ver.pal_version_s.pv_pal_b_model&0xf,
+ min_ver.pal_version_s.pv_pal_b_rev);
return p - page;
}
@@ -648,6 +685,9 @@ perfmon_info(char *page)
}
#ifdef IA64_PAL_PERF_MON_INFO_BUG
+ /*
+ * This bug has been fixed in PAL 2.2.9 and higher
+ */
pm_buffer[5]=0x3;
pm_info.pal_perf_mon_info_s.cycles = 0x12;
pm_info.pal_perf_mon_info_s.retired = 0x08;
@@ -708,30 +748,111 @@ frequency_info(char *page)
return p - page;
}
-
-/*
- * Entry point routine: all calls go trhough this function
- */
static int
-palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+tr_info(char *page)
{
- palinfo_func_t info = (palinfo_func_t)data;
- int len = info(page);
+ char *p = page;
+ s64 status;
+ pal_tr_valid_u_t tr_valid;
+ u64 tr_buffer[4];
+ pal_vm_info_1_u_t vm_info_1;
+ pal_vm_info_2_u_t vm_info_2;
+ int i, j;
+ u64 max[3], pgm;
+ struct ifa_reg {
+ u64 valid:1;
+ u64 ig:11;
+ u64 vpn:52;
+ } *ifa_reg;
+ struct itir_reg {
+ u64 rv1:2;
+ u64 ps:6;
+ u64 key:24;
+ u64 rv2:32;
+ } *itir_reg;
+ struct gr_reg {
+ u64 p:1;
+ u64 rv1:1;
+ u64 ma:3;
+ u64 a:1;
+ u64 d:1;
+ u64 pl:2;
+ u64 ar:3;
+ u64 ppn:38;
+ u64 rv2:2;
+ u64 ed:1;
+ u64 ig:11;
+ } *gr_reg;
+ struct rid_reg {
+ u64 ig1:1;
+ u64 rv1:1;
+ u64 ig2:6;
+ u64 rid:24;
+ u64 rv2:32;
+ } *rid_reg;
- if (len <= off+count) *eof = 1;
+ if ((status=ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+ printk("ia64_pal_vm_summary=%ld\n", status);
+ return 0;
+ }
+ max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+ max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
- *start = page + off;
- len -= off;
+ for (i=0; i < 2; i++ ) {
+ for (j=0; j < max[i]; j++) {
- if (len>count) len = count;
- if (len<0) len = 0;
+ status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
+ if (status != 0) {
+ printk(__FUNCTION__ " pal call failed on tr[%d:%d]=%ld\n", i, j, status);
+ continue;
+ }
- return len;
+ ifa_reg = (struct ifa_reg *)&tr_buffer[2];
+
+ if (ifa_reg->valid == 0) continue;
+
+ gr_reg = (struct gr_reg *)tr_buffer;
+ itir_reg = (struct itir_reg *)&tr_buffer[1];
+ rid_reg = (struct rid_reg *)&tr_buffer[3];
+
+ pgm = -1 << (itir_reg->ps - 12);
+ p += sprintf(p, "%cTR%d: av=%d pv=%d dv=%d mv=%d\n" \
+ "\tppn : 0x%lx\n" \
+ "\tvpn : 0x%lx\n" \
+ "\tps : ",
+
+ "ID"[i],
+ j,
+ tr_valid.pal_tr_valid_s.access_rights_valid,
+ tr_valid.pal_tr_valid_s.priv_level_valid,
+ tr_valid.pal_tr_valid_s.dirty_bit_valid,
+ tr_valid.pal_tr_valid_s.mem_attr_valid,
+ (gr_reg->ppn & pgm)<< 12,
+ (ifa_reg->vpn & pgm)<< 12);
+
+ p = bitvector_process(p, 1<< itir_reg->ps);
+
+ p += sprintf(p, "\n\tpl : %d\n" \
+ "\tar : %d\n" \
+ "\trid : %x\n" \
+ "\tp : %d\n" \
+ "\tma : %d\n" \
+ "\td : %d\n",
+ gr_reg->pl,
+ gr_reg->ar,
+ rid_reg->rid,
+ gr_reg->p,
+ gr_reg->ma,
+ gr_reg->d);
+ }
+ }
+ return p - page;
}
+
+
/*
- * List names,function pairs for every entry in /proc/palinfo
- * Must be terminated with the NULL,NULL entry.
+ * List {name,function} pairs for every entry in /proc/palinfo/cpu*
*/
static palinfo_entry_t palinfo_entries[]={
{ "version_info", version_info, },
@@ -742,38 +863,190 @@ static palinfo_entry_t palinfo_entries[]={
{ "processor_info", processor_info, },
{ "perfmon_info", perfmon_info, },
{ "frequency_info", frequency_info, },
- { NULL, NULL,}
+ { "bus_info", bus_info },
+ { "tr_info", tr_info, }
};
+#define NR_PALINFO_ENTRIES (sizeof(palinfo_entries)/sizeof(palinfo_entry_t))
+
+/*
+ * this array is used to keep track of the proc entries we create. This is
+ * required in the module mode when we need to remove all entries. The procfs code
+ * does not do recursion of deletion
+ *
+ * Notes:
+ * - first +1 accounts for the cpuN entry
+ * - second +1 account for toplevel palinfo
+ *
+ */
+#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)+1)
+
+static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
+
+/*
+ * This data structure is used to pass which cpu,function is being requested
+ * It must fit in a 64bit quantity to be passed to the proc callback routine
+ *
+ * In SMP mode, when we get a request for another CPU, we must call that
+ * other CPU using IPI and wait for the result before returning.
+ */
+typedef union {
+ u64 value;
+ struct {
+ unsigned req_cpu: 32; /* for which CPU this info is */
+ unsigned func_id: 32; /* which function is requested */
+ } pal_func_cpu;
+} pal_func_cpu_u_t;
+
+#define req_cpu pal_func_cpu.req_cpu
+#define func_id pal_func_cpu.func_id
+
+#ifdef CONFIG_SMP
+
+/*
+ * used to hold information about final function to call
+ */
+typedef struct {
+ palinfo_func_t func; /* pointer to function to call */
+ char *page; /* buffer to store results */
+ int ret; /* return value from call */
+} palinfo_smp_data_t;
+
+
+/*
+ * this function does the actual final call and he called
+ * from the smp code, i.e., this is the palinfo callback routine
+ */
+static void
+palinfo_smp_call(void *info)
+{
+ palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
+ /* printk(__FUNCTION__" called on CPU %d\n", smp_processor_id());*/
+ if (data == NULL) {
+ printk(KERN_ERR __FUNCTION__" data pointer is NULL\n");
+ data->ret = 0; /* no output */
+ return;
+ }
+ /* does this actual call */
+ data->ret = (*data->func)(data->page);
+}
+
+/*
+ * function called to trigger the IPI, we need to access a remote CPU
+ * Return:
+ * 0 : error or nothing to output
+ * otherwise how many bytes in the "page" buffer were written
+ */
+static
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+ palinfo_smp_data_t ptr;
+ int ret;
+
+ ptr.func = palinfo_entries[f->func_id].proc_read;
+ ptr.page = page;
+ ptr.ret = 0; /* just in case */
+
+ /*printk(__FUNCTION__" calling CPU %d from CPU %d for function %d\n", f->req_cpu,smp_processor_id(), f->func_id);*/
+
+ /* will send IPI to other CPU and wait for completion of remote call */
+ if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+ printk(__FUNCTION__" remote CPU call from %d to %d on function %d: error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
+ return 0;
+ }
+ return ptr.ret;
+}
+#else /* ! CONFIG_SMP */
+static
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+ printk(__FUNCTION__" should not be called with non SMP kernel\n");
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Entry point routine: all calls go through this function
+ */
+static int
+palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ int len=0;
+ pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+
+ MOD_INC_USE_COUNT;
+ /*
+ * in SMP mode, we may need to call another CPU to get correct
+ * information. PAL, by definition, is processor specific
+ */
+ if (f->req_cpu == smp_processor_id())
+ len = (*palinfo_entries[f->func_id].proc_read)(page);
+ else
+ len = palinfo_handle_smp(f, page);
+
+ if (len <= off+count) *eof = 1;
+
+ *start = page + off;
+ len -= off;
+
+ if (len>count) len = count;
+ if (len<0) len = 0;
+
+ MOD_DEC_USE_COUNT;
+
+ return len;
+}
static int __init
palinfo_init(void)
{
- palinfo_entry_t *p;
+# define CPUSTR "cpu%d"
+
+ pal_func_cpu_u_t f;
+ struct proc_dir_entry **pdir = palinfo_proc_entries;
+ struct proc_dir_entry *palinfo_dir, *cpu_dir;
+ int i, j;
+ char cpustr[sizeof(CPUSTR)];
printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
- palinfo_dir = create_proc_entry("palinfo", S_IFDIR | S_IRUGO | S_IXUGO, NULL);
+ palinfo_dir = proc_mkdir("pal", NULL);
+
+ /*
+ * we keep track of created entries in a depth-first order for
+ * cleanup purposes. Each entry is stored into palinfo_proc_entries
+ */
+ for (i=0; i < NR_CPUS; i++) {
+
+ if (!cpu_is_online(i)) continue;
+
+ sprintf(cpustr,CPUSTR, i);
+
+ cpu_dir = proc_mkdir(cpustr, palinfo_dir);
- for (p = palinfo_entries; p->name ; p++){
- p->entry = create_proc_read_entry (p->name, 0, palinfo_dir,
- palinfo_read_entry, p->proc_read);
+ f.req_cpu = i;
+
+ for (j=0; j < NR_PALINFO_ENTRIES; j++) {
+ f.func_id = j;
+ *pdir++ = create_proc_read_entry (palinfo_entries[j].name, 0, cpu_dir,
+ palinfo_read_entry, (void *)f.value);
+ }
+ *pdir++ = cpu_dir;
}
+ *pdir = palinfo_dir;
return 0;
}
-static int __exit
+static void __exit
palinfo_exit(void)
{
- palinfo_entry_t *p;
+ int i = 0;
- for (p = palinfo_entries; p->name ; p++){
- remove_proc_entry (p->name, palinfo_dir);
+ /* remove all nodes: depth first pass */
+ for (i=0; i< NR_PALINFO_PROC_ENTRIES ; i++) {
+ remove_proc_entry (palinfo_proc_entries[i]->name, NULL);
}
- remove_proc_entry ("palinfo", 0);
-
- return 0;
}
module_init(palinfo_init);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index ab86e69b3..80509c6a1 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -3,34 +3,509 @@
*
* This implementation is for IA-64 platforms that do not support
* I/O TLBs (aka DMA address translation hardware).
- *
- * XXX This doesn't do the right thing yet. It appears we would have
- * to add additional zones so we can implement the various address
- * mask constraints that we might encounter. A zone for memory < 32
- * bits is obviously necessary...
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
*/
-#include <linux/types.h>
+#include <linux/config.h>
+
#include <linux/mm.h>
-#include <linux/string.h>
#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
#include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#ifdef CONFIG_SWIOTLB
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1)))
+
+/*
+ * log of the size of each IO TLB slab. The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+/*
+ * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the
+ * memory was in fact allocated by this API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end.
+ * This is command line adjustable via setup_io_tlb_npages.
+ */
+unsigned long io_tlb_nslabs = 1024;
+
+/*
+ * This is a free list describing the number of free entries available from each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry for the sync
+ * operations.
+ */
+static unsigned char **io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED;
+
+static int __init
+setup_io_tlb_npages (char *str)
+{
+ io_tlb_nslabs = simple_strtoul(str, NULL, 0) << (PAGE_SHIFT - IO_TLB_SHIFT);
+ return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer
+ * data structures for the software IO TLB used to implement the PCI DMA API
+ */
+void
+setup_swiotlb (void)
+{
+ int i;
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+ if (!io_tlb_start)
+ BUG();
+ io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+
+ /*
+ * Allocate and initialize the free list array. This array is used
+ * to find contiguous free memory regions of size 2^IO_TLB_SHIFT between
+ * io_tlb_start and io_tlb_end.
+ */
+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ for (i = 0; i < io_tlb_nslabs; i++)
+ io_tlb_list[i] = io_tlb_nslabs - i;
+ io_tlb_index = 0;
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+
+ printk("Placing software IO TLB between 0x%p - 0x%p\n", io_tlb_start, io_tlb_end);
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction)
+{
+ unsigned long flags;
+ char *dma_addr;
+ unsigned int i, nslots, stride, index, wrap;
+
+ /*
+ * For mappings greater than a page size, we limit the stride (and hence alignment)
+ * to a page size.
+ */
+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ if (size > (1 << PAGE_SHIFT))
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+ stride = nslots;
+
+ if (!nslots)
+ BUG();
+
+ /*
+ * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer
+ * from that IO TLB pool.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ wrap = index = ALIGN(io_tlb_index, stride);
+ do {
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot and mark the
+ * entries as '0' indicating unavailable.
+ */
+ if (io_tlb_list[index] >= nslots) {
+ for (i = index; i < index + nslots; i++)
+ io_tlb_list[i] = 0;
+ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+ /*
+ * Update the indices to avoid searching in the next round.
+ */
+ io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0;
+
+ goto found;
+ }
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ } while (index != wrap);
+
+ /*
+ * XXX What is a suitable recovery mechanism here? We cannot
+ * sleep because we are called from with in interrupts!
+ */
+ panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size);
+found:
+ }
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+ /*
+ * Save away the mapping from the original address to the DMA address. This is needed
+ * when we sync the memory. Then we sync the buffer if needed.
+ */
+ io_tlb_orig_addr[index] = buffer;
+ if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL)
+ memcpy(dma_addr, buffer, size);
+
+ return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+{
+ unsigned long flags;
+ int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ char *buffer = io_tlb_orig_addr[index];
+
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL))
+ /*
+ * bounce... copy the data back into the original buffer
+ * and delete the bounce buffer.
+ */
+ memcpy(buffer, dma_addr, size);
+
+ /*
+ * Return the buffer to the free list by setting the corresponding entries to indicate
+ * the number of contigous entries available.
+ * While returning the entries to the free list, we merge the entries with slots below
+ * and above the pool being returned.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0);
+ /*
+ * Step 1: return the slots to the free list, merging the slots with superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--)
+ io_tlb_list[i] = ++count;
+ /*
+ * Step 2: merge the returned slots with the preceeding slots, if available (non zero)
+ */
+ for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--)
+ io_tlb_list[i] += io_tlb_list[index];
+ }
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+{
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ char *buffer = io_tlb_orig_addr[index];
+
+ /*
+ * bounce... copy the data back into/from the original buffer
+ * XXX How do you handle PCI_DMA_BIDIRECTIONAL here ?
+ */
+ if (direction == PCI_DMA_FROMDEVICE)
+ memcpy(buffer, dma_addr, size);
+ else if (direction == PCI_DMA_TODEVICE)
+ memcpy(dma_addr, buffer, size);
+ else
+ BUG();
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.
+ * The PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+dma_addr_t
+pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+ unsigned long pci_addr = virt_to_phys(ptr);
+
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ /*
+ * Check if the PCI device can DMA to ptr... if so, just return ptr
+ */
+ if ((pci_addr & ~hwdev->dma_mask) == 0)
+ /*
+ * Device is bit capable of DMA'ing to the
+ * buffer... just return the PCI address of ptr
+ */
+ return pci_addr;
+
+ /*
+ * get a bounce buffer:
+ */
+ pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction));
+
+ /*
+ * Ensure that the address returned is DMA'ble:
+ */
+ if ((pci_addr & ~hwdev->dma_mask) != 0)
+ panic("__pci_map_single: bounce buffer is not DMA'ble");
+
+ return pci_addr;
+}
+
+/*
+ * Unmap a single streaming mode DMA translation. The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+void
+pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+{
+ char *dma_addr = phys_to_virt(pci_addr);
+
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ __pci_unmap_single(hwdev, dma_addr, size, direction);
+}
+
+/*
+ * Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so. At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+void
+pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+{
+ char *dma_addr = phys_to_virt(pci_addr);
+
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+ __pci_sync_single(hwdev, dma_addr, size, direction);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA. This is the scather-gather version of the
+ * above pci_map_single interface. Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length. They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+int
+pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+ int i;
+
+ if (direction == PCI_DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++) {
+ sg->orig_address = sg->address;
+ if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) {
+ sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction);
+ }
+ }
+ return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+void
+pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+ int i;
+
+ if (direction == PCI_DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->orig_address != sg->address) {
+ __pci_unmap_single(hwdev, sg->address, sg->length, direction);
+ sg->address = sg->orig_address;
+ }
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA
+ * translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+void
+pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+ int i;
+
+ if (direction == PCI_DMA_NONE)
+ BUG();
+
+ for (i = 0; i < nelems; i++, sg++)
+ if (sg->orig_address != sg->address)
+ __pci_sync_single(hwdev, sg->address, sg->length, direction);
+}
+
+#else
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+extern inline dma_addr_t
+pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ return virt_to_bus(ptr);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation. The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+extern inline void
+pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ /* Nothing to do */
+}
+/*
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA. This is the scather-gather version of the
+ * above pci_map_single interface. Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length. They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+extern inline int
+pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ return nents;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+extern inline void
+pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ /* Nothing to do */
+}
+/*
+ * Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so. At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+extern inline void
+pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ /* Nothing to do */
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA
+ * translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+extern inline void
+pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+ if (direction == PCI_DMA_NONE)
+ BUG();
+ /* Nothing to do */
+}
+
+#endif /* CONFIG_SWIOTLB */
void *
pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
{
- void *ret;
+ unsigned long pci_addr;
int gfp = GFP_ATOMIC;
+ void *ret;
- if (!hwdev || hwdev->dma_mask == 0xffffffff)
- gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
+ if (!hwdev || hwdev->dma_mask <= 0xffffffff)
+ gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
ret = (void *)__get_free_pages(gfp, get_order(size));
+ if (!ret)
+ return NULL;
- if (ret) {
- memset(ret, 0, size);
- *dma_handle = virt_to_bus(ret);
- }
+ memset(ret, 0, size);
+ pci_addr = virt_to_phys(ret);
+ if ((pci_addr & ~hwdev->dma_mask) != 0)
+ panic("pci_alloc_consistent: allocated memory is out of range for PCI device");
+ *dma_handle = pci_addr;
return ret;
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 29291e1f9..752b2a9a1 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -11,6 +11,7 @@
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/interrupt.h>
#include <linux/smp_lock.h>
#include <asm/errno.h>
@@ -55,24 +56,23 @@
#define WRITE_PMCS 0xa1
#define READ_PMDS 0xa2
#define STOP_PMCS 0xa3
-#define IA64_COUNTER_MASK 0xffffffffffffff6f
-#define PERF_OVFL_VAL 0xffffffff
+#define IA64_COUNTER_MASK 0xffffffffffffff6fL
+#define PERF_OVFL_VAL 0xffffffffL
+
+volatile int used_by_system;
struct perfmon_counter {
unsigned long data;
unsigned long counter_num;
};
-unsigned long pmds[MAX_PERF_COUNTER];
-struct task_struct *perf_owner=NULL;
+unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER];
asmlinkage unsigned long
sys_perfmonctl (int cmd1, int cmd2, void *ptr)
{
struct perfmon_counter tmp, *cptr = ptr;
- unsigned long pmd, cnum, dcr, flags;
- struct task_struct *p;
- struct pt_regs *regs;
+ unsigned long cnum, dcr, flags;
struct perf_counter;
int i;
@@ -80,22 +80,24 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
case WRITE_PMCS: /* Writes to PMC's and clears PMDs */
case WRITE_PMCS_AND_START: /* Also starts counting */
- if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
- return -EFAULT;
+ if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system)
+ return -EINVAL;
- if (cmd2 > MAX_PERF_COUNTER)
+ if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
return -EFAULT;
- if (perf_owner && perf_owner != current)
- return -EBUSY;
- perf_owner = current;
+ current->thread.flags |= IA64_THREAD_PM_VALID;
for (i = 0; i < cmd2; i++, cptr++) {
copy_from_user(&tmp, cptr, sizeof(tmp));
/* XXX need to check validity of counter_num and perhaps data!! */
+ if (tmp.counter_num < 4
+ || tmp.counter_num >= 4 + MAX_PERF_COUNTER - used_by_system)
+ return -EFAULT;
+
ia64_set_pmc(tmp.counter_num, tmp.data);
ia64_set_pmd(tmp.counter_num, 0);
- pmds[tmp.counter_num - 4] = 0;
+ pmds[smp_processor_id()][tmp.counter_num - 4] = 0;
}
if (cmd1 == WRITE_PMCS_AND_START) {
@@ -104,26 +106,13 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
dcr |= IA64_DCR_PP;
ia64_set_dcr(dcr);
local_irq_restore(flags);
-
- /*
- * This is a no can do. It obviously wouldn't
- * work on SMP where another process may not
- * be blocked at all. We need to put in a perfmon
- * IPI to take care of MP systems. See blurb above.
- */
- lock_kernel();
- for_each_task(p) {
- regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) -1 ;
- ia64_psr(regs)->pp = 1;
- }
- unlock_kernel();
ia64_set_pmc(0, 0);
}
break;
case READ_PMDS:
- if (cmd2 > MAX_PERF_COUNTER)
- return -EFAULT;
+ if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system)
+ return -EINVAL;
if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2))
return -EFAULT;
@@ -153,9 +142,13 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
* when we re-enabled interrupts. When I muck with dcr,
* is the irq_save/restore needed?
*/
- for (i = 0, cnum = 4;i < MAX_PERF_COUNTER; i++, cnum++, cptr++){
- pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL);
- put_user(pmd, &cptr->data);
+ for (i = 0, cnum = 4;i < cmd2; i++, cnum++, cptr++) {
+ tmp.data = (pmds[smp_processor_id()][i]
+ + (ia64_get_pmd(cnum) & PERF_OVFL_VAL));
+ tmp.counter_num = cnum;
+ if (copy_to_user(cptr, &tmp, sizeof(tmp)))
+ return -EFAULT;
+ //put_user(pmd, &cptr->data);
}
local_irq_save(flags);
__asm__ __volatile__("ssm psr.pp");
@@ -167,30 +160,22 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
case STOP_PMCS:
ia64_set_pmc(0, 1);
- for (i = 0; i < MAX_PERF_COUNTER; ++i)
- ia64_set_pmc(i, 0);
+ ia64_srlz_d();
+ for (i = 0; i < MAX_PERF_COUNTER - used_by_system; ++i)
+ ia64_set_pmc(4+i, 0);
- local_irq_save(flags);
- dcr = ia64_get_dcr();
- dcr &= ~IA64_DCR_PP;
- ia64_set_dcr(dcr);
- local_irq_restore(flags);
- /*
- * This is a no can do. It obviously wouldn't
- * work on SMP where another process may not
- * be blocked at all. We need to put in a perfmon
- * IPI to take care of MP systems. See blurb above.
- */
- lock_kernel();
- for_each_task(p) {
- regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
- ia64_psr(regs)->pp = 0;
+ if (!used_by_system) {
+ local_irq_save(flags);
+ dcr = ia64_get_dcr();
+ dcr &= ~IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
}
- unlock_kernel();
- perf_owner = NULL;
+ current->thread.flags &= ~(IA64_THREAD_PM_VALID);
break;
default:
+ return -EINVAL;
break;
}
return 0;
@@ -202,13 +187,13 @@ update_counters (void)
unsigned long mask, i, cnum, val;
mask = ia64_get_pmc(0) >> 4;
- for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) {
+ for (i = 0, cnum = 4; i < MAX_PERF_COUNTER - used_by_system; cnum++, i++, mask >>= 1) {
+ val = 0;
if (mask & 0x1)
- val = PERF_OVFL_VAL;
- else
+ val += PERF_OVFL_VAL + 1;
/* since we got an interrupt, might as well clear every pmd. */
- val = ia64_get_pmd(cnum) & PERF_OVFL_VAL;
- pmds[i] += val;
+ val += ia64_get_pmd(cnum) & PERF_OVFL_VAL;
+ pmds[smp_processor_id()][i] += val;
ia64_set_pmd(cnum, 0);
}
}
@@ -221,20 +206,61 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
ia64_srlz_d();
}
+static struct irqaction perfmon_irqaction = {
+ handler: perfmon_interrupt,
+ flags: SA_INTERRUPT,
+ name: "perfmon"
+};
+
void
perfmon_init (void)
{
- if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) {
- printk("perfmon_init: could not allocate performance monitor vector %u\n",
- PERFMON_IRQ);
- return;
- }
+ irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU;
+ irq_desc[PERFMON_IRQ].handler = &irq_type_ia64_sapic;
+ setup_irq(PERFMON_IRQ, &perfmon_irqaction);
+
ia64_set_pmv(PERFMON_IRQ);
ia64_srlz_d();
printk("Initialized perfmon vector to %u\n",PERFMON_IRQ);
}
+void
+perfmon_init_percpu (void)
+{
+ ia64_set_pmv(PERFMON_IRQ);
+ ia64_srlz_d();
+}
+
+void
+ia64_save_pm_regs (struct thread_struct *t)
+{
+ int i;
+
+ ia64_set_pmc(0, 1);
+ ia64_srlz_d();
+ for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) {
+ t->pmd[i] = ia64_get_pmd(4+i);
+ t->pmod[i] = pmds[smp_processor_id()][i];
+ t->pmc[i] = ia64_get_pmc(4+i);
+ }
+}
+
+void
+ia64_load_pm_regs (struct thread_struct *t)
+{
+ int i;
+
+ for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) {
+ ia64_set_pmd(4+i, t->pmd[i]);
+ pmds[smp_processor_id()][i] = t->pmod[i];
+ ia64_set_pmc(4+i, t->pmc[i]);
+ }
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+}
+
#else /* !CONFIG_PERFMON */
+
asmlinkage unsigned long
sys_perfmonctl (int cmd1, int cmd2, void *ptr)
{
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 3759e52f8..e586a4074 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -27,6 +27,8 @@
#include <asm/unwind.h>
#include <asm/user.h>
+#ifdef CONFIG_IA64_NEW_UNWIND
+
static void
do_show_stack (struct unw_frame_info *info, void *arg)
{
@@ -44,6 +46,8 @@ do_show_stack (struct unw_frame_info *info, void *arg)
} while (unw_unwind(info) >= 0);
}
+#endif
+
void
show_stack (struct task_struct *task)
{
@@ -118,15 +122,14 @@ cpu_idle (void *unused)
current->nice = 20;
current->counter = -100;
-#ifdef CONFIG_SMP
- if (!current->need_resched)
- min_xtp();
-#endif
while (1) {
- while (!current->need_resched) {
+#ifdef CONFIG_SMP
+ if (!current->need_resched)
+ min_xtp();
+#endif
+ while (!current->need_resched)
continue;
- }
#ifdef CONFIG_SMP
normal_xtp();
#endif
@@ -157,11 +160,12 @@ cpu_idle (void *unused)
void
ia64_save_extra (struct task_struct *task)
{
- extern void ia64_save_debug_regs (unsigned long *save_area);
- extern void ia32_save_state (struct thread_struct *thread);
-
if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
ia64_save_debug_regs(&task->thread.dbr[0]);
+#ifdef CONFIG_PERFMON
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+ ia64_save_pm_regs(&task->thread);
+#endif
if (IS_IA32_PROCESS(ia64_task_regs(task)))
ia32_save_state(&task->thread);
}
@@ -169,11 +173,12 @@ ia64_save_extra (struct task_struct *task)
void
ia64_load_extra (struct task_struct *task)
{
- extern void ia64_load_debug_regs (unsigned long *save_area);
- extern void ia32_load_state (struct thread_struct *thread);
-
if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
ia64_load_debug_regs(&task->thread.dbr[0]);
+#ifdef CONFIG_PERFMON
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+ ia64_load_pm_regs(&task->thread);
+#endif
if (IS_IA32_PROCESS(ia64_task_regs(task)))
ia32_load_state(&task->thread);
}
@@ -532,17 +537,6 @@ exit_thread (void)
}
}
-/*
- * Free remaining state associated with DEAD_TASK. This is called
- * after the parent of DEAD_TASK has collected the exist status of the
- * task via wait().
- */
-void
-release_thread (struct task_struct *dead_task)
-{
- /* nothing to do */
-}
-
unsigned long
get_wchan (struct task_struct *p)
{
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index ae8991c51..10868ce41 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -549,6 +549,7 @@ void
ia64_sync_fph (struct task_struct *child)
{
if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) {
+ ia64_psr(ia64_task_regs(child))->mfh = 0;
ia64_set_fpu_owner(0);
ia64_save_fpu(&child->thread.fph[0]);
child->thread.flags |= IA64_THREAD_FPH_VALID;
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index f4b8ce9dd..f73cd8968 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -156,6 +156,14 @@ ia64_sal_init (struct ia64_sal_systab *systab)
struct ia64_sal_desc_platform_feature *pf = (void *) p;
printk("SAL: Platform features ");
+#ifdef CONFIG_IA64_HAVE_IRQREDIR
+ /*
+ * Early versions of SAL say we don't have
+ * IRQ redirection, even though we do...
+ */
+ pf->feature_mask |= (1 << 1);
+#endif
+
if (pf->feature_mask & (1 << 0))
printk("BusLock ");
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
index bc55670bf..1bbe4feab 100644
--- a/arch/ia64/kernel/semaphore.c
+++ b/arch/ia64/kernel/semaphore.c
@@ -222,9 +222,6 @@ down_read_failed (struct rw_semaphore *sem)
void
__down_read_failed (struct rw_semaphore *sem, long count)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
-
while (1) {
if (count == -1) {
down_read_failed_biased(sem);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fcb4e6190..62e3e19ea 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -122,6 +122,10 @@ setup_arch (char **cmdline_p)
*/
memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param));
+ *cmdline_p = __va(ia64_boot_param.command_line);
+ strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+ saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
+
efi_init();
max_pfn = 0;
@@ -133,19 +137,65 @@ setup_arch (char **cmdline_p)
*/
bootmap_start = PAGE_ALIGN(__pa(&_end));
if (ia64_boot_param.initrd_size)
- bootmap_start = PAGE_ALIGN(bootmap_start + ia64_boot_param.initrd_size);
+ bootmap_start = PAGE_ALIGN(bootmap_start
+ + ia64_boot_param.initrd_size);
bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
efi_memmap_walk(free_available_memory, 0);
reserve_bootmem(bootmap_start, bootmap_size);
+
#ifdef CONFIG_BLK_DEV_INITRD
initrd_start = ia64_boot_param.initrd_start;
+
if (initrd_start) {
+ u64 start, size;
+# define is_same_page(a,b) (((a)&PAGE_MASK) == ((b)&PAGE_MASK))
+
+#if 1
+ /* XXX for now some backwards compatibility... */
+ if (initrd_start >= PAGE_OFFSET)
+ printk("Warning: boot loader passed virtual address "
+ "for initrd, please upgrade the loader\n");
+ } else
+#endif
+ /*
+ * The loader ONLY passes physical addresses
+ */
+ initrd_start = (unsigned long)__va(initrd_start);
initrd_end = initrd_start+ia64_boot_param.initrd_size;
+ start = initrd_start;
+ size = ia64_boot_param.initrd_size;
+
printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
(void *) initrd_start, ia64_boot_param.initrd_size);
- reserve_bootmem(virt_to_phys(initrd_start), ia64_boot_param.initrd_size);
+
+ /*
+ * The kernel end and the beginning of initrd can be
+ * on the same page. This would cause the page to be
+ * reserved twice. While not harmful, it does lead to
+ * a warning message which can cause confusion. Thus,
+ * we make sure that in this case we only reserve new
+ * pages, i.e., initrd only pages. We need to:
+ *
+ * - align up start
+ * - adjust size of reserved section accordingly
+ *
+ * It should be noted that this operation is only
+ * valid for the reserve_bootmem() call and does not
+ * affect the integrety of the initrd itself.
+ *
+ * reserve_bootmem() considers partial pages as reserved.
+ */
+ if (is_same_page(initrd_start, (unsigned long)&_end)) {
+ start = PAGE_ALIGN(start);
+ size -= start-initrd_start;
+
+ printk("Initial ramdisk & kernel on the same page: "
+ "reserving start=%lx size=%ld bytes\n",
+ start, size);
+ }
+ reserve_bootmem(__pa(start), size);
}
#endif
#if 0
@@ -164,27 +214,21 @@ setup_arch (char **cmdline_p)
/* process SAL system table: */
ia64_sal_init(efi.sal_systab);
- *cmdline_p = __va(ia64_boot_param.command_line);
- strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
- saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
-
- printk("args to kernel: %s\n", *cmdline_p);
-
#ifdef CONFIG_SMP
bootstrap_processor = hard_smp_processor_id();
current->processor = bootstrap_processor;
#endif
cpu_init(); /* initialize the bootstrap CPU */
+#ifdef CONFIG_IA64_GENERIC
+ machvec_init(acpi_get_sysname());
+#endif
+
if (efi.acpi) {
/* Parse the ACPI tables */
acpi_parse(efi.acpi);
}
-#ifdef CONFIG_IA64_GENERIC
- machvec_init(acpi_get_sysname());
-#endif
-
#ifdef CONFIG_VT
# if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
@@ -197,8 +241,16 @@ setup_arch (char **cmdline_p)
/* enable IA-64 Machine Check Abort Handling */
ia64_mca_init();
#endif
+
paging_init();
platform_setup(cmdline_p);
+
+#ifdef CONFIG_SWIOTLB
+ {
+ extern void setup_swiotlb (void);
+ setup_swiotlb();
+ }
+#endif
}
/*
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index b5153433f..d64305cf3 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -320,6 +320,58 @@ smp_send_flush_tlb(void)
#endif /* !CONFIG_ITANIUM_PTCG */
/*
+ * Run a function on another CPU
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <retry> If true, keep retrying until ready.
+ * <wait> If true, wait until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int
+smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int retry, int wait)
+{
+ struct smp_call_struct data;
+ long timeout;
+ int cpus = 1;
+
+ if (cpuid == smp_processor_id()) {
+ printk(__FUNCTION__" trying to call self\n");
+ return -EBUSY;
+ }
+
+ data.func = func;
+ data.info = info;
+ data.wait = wait;
+ atomic_set(&data.unstarted_count, cpus);
+ atomic_set(&data.unfinished_count, cpus);
+
+ if (pointer_lock(&smp_call_function_data, &data, retry))
+ return -EBUSY;
+
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_single(cpuid, IPI_CALL_FUNC);
+
+ /* Wait for response */
+ timeout = jiffies + HZ;
+ while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
+ barrier();
+ if (atomic_read(&data.unstarted_count) > 0) {
+ smp_call_function_data = NULL;
+ return -ETIMEDOUT;
+ }
+ if (wait)
+ while (atomic_read(&data.unfinished_count) > 0)
+ barrier();
+ /* unlock pointer */
+ smp_call_function_data = NULL;
+ return 0;
+}
+
+/*
* Run a function on all other CPUs.
* <func> The function to run. This must be fast and non-blocking.
* <info> An arbitrary pointer to pass to the function.
@@ -396,13 +448,19 @@ void
smp_do_timer(struct pt_regs *regs)
{
int cpu = smp_processor_id();
+ int user = user_mode(regs);
struct cpuinfo_ia64 *data = &cpu_data[cpu];
- if (!--data->prof_counter) {
- irq_enter(cpu, TIMER_IRQ);
- update_process_times(user_mode(regs));
+ if (--data->prof_counter <= 0) {
data->prof_counter = data->prof_multiplier;
- irq_exit(cpu, TIMER_IRQ);
+ /*
+ * update_process_times() expects us to have done irq_enter().
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
+ irq_enter(cpu, 0);
+ update_process_times(user);
+ irq_exit(cpu, 0);
}
}
@@ -473,6 +531,11 @@ smp_callin(void)
extern void ia64_rid_init(void);
extern void ia64_init_itm(void);
extern void ia64_cpu_local_tick(void);
+#ifdef CONFIG_PERFMON
+ extern void perfmon_init_percpu(void);
+#endif
+
+ efi_map_pal_code();
cpu_init();
@@ -481,6 +544,10 @@ smp_callin(void)
/* setup the CPU local timer tick */
ia64_init_itm();
+#ifdef CONFIG_PERFMON
+ perfmon_init_percpu();
+#endif
+
/* Disable all local interrupts */
ia64_set_lrr0(0, 1);
ia64_set_lrr1(0, 1);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 95b2b3fc3..96ff76c01 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -150,11 +150,13 @@ do_gettimeofday (struct timeval *tv)
static void
timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- static unsigned long last_time;
- static unsigned char count;
int cpu = smp_processor_id();
unsigned long new_itm;
+#if 0
+ static unsigned long last_time;
+ static unsigned char count;
int printed = 0;
+#endif
/*
* Here we are in the timer irq handler. We have irqs locally
@@ -192,7 +194,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
if (time_after(new_itm, ia64_get_itc()))
break;
-#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP))
+#if 0
/*
* SoftSDV in SMP mode is _slow_, so we do "lose" ticks,
* but it's really OK...
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 4003b20f1..bf1abd839 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -204,11 +204,13 @@ disabled_fph_fault (struct pt_regs *regs)
{
struct task_struct *fpu_owner = ia64_get_fpu_owner();
+ /* first, clear psr.dfh and psr.mfh: */
regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH);
if (fpu_owner != current) {
ia64_set_fpu_owner(current);
if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) {
+ ia64_psr(ia64_task_regs(fpu_owner))->mfh = 0;
fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID;
__ia64_save_fpu(fpu_owner->thread.fph);
}
@@ -216,6 +218,11 @@ disabled_fph_fault (struct pt_regs *regs)
__ia64_load_fpu(current->thread.fph);
} else {
__ia64_init_fpu();
+ /*
+ * Set mfh because the state in thread.fph does not match
+ * the state in the fph partition.
+ */
+ ia64_psr(regs)->mfh = 1;
}
}
}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 739007a96..5d0049f32 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -62,7 +62,7 @@
#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1)
#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE)
-#define UNW_DEBUG 1
+#define UNW_DEBUG 0
#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */
#if UNW_DEBUG
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
index 3b16916d0..350e66256 100644
--- a/arch/ia64/lib/memcpy.S
+++ b/arch/ia64/lib/memcpy.S
@@ -1,3 +1,20 @@
+/*
+ *
+ * Optimized version of the standard memcpy() function
+ *
+ * Inputs:
+ * in0: destination address
+ * in1: source address
+ * in2: number of bytes to copy
+ * Output:
+ * no return value
+ *
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+
#include <asm/asmmacro.h>
GLOBAL_ENTRY(bcopy)
@@ -10,77 +27,254 @@ END(bcopy)
// FALL THROUGH
GLOBAL_ENTRY(memcpy)
-# define MEM_LAT 4
-
-# define N MEM_LAT-1
-# define Nrot ((MEM_LAT + 7) & ~7)
+# define MEM_LAT 2 /* latency to L1 cache */
# define dst r2
# define src r3
-# define len r9
-# define saved_pfs r10
-# define saved_lc r11
-# define saved_pr r16
-# define t0 r17
-# define cnt r18
+# define retval r8
+# define saved_pfs r9
+# define saved_lc r10
+# define saved_pr r11
+# define cnt r16
+# define src2 r17
+# define t0 r18
+# define t1 r19
+# define t2 r20
+# define t3 r21
+# define t4 r22
+# define src_end r23
+# define N (MEM_LAT + 4)
+# define Nrot ((N + 7) & ~7)
+
+ /*
+ * First, check if everything (src, dst, len) is a multiple of eight. If
+ * so, we handle everything with no taken branches (other than the loop
+ * itself) and a small icache footprint. Otherwise, we jump off to
+ * the more general copy routine handling arbitrary
+ * sizes/alignment etc.
+ */
UNW(.prologue)
UNW(.save ar.pfs, saved_pfs)
alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
lfetch [in1]
+#else
+ nop.m 0
+#endif
+ or t0=in0,in1
+ ;;
- .rotr val[MEM_LAT]
- .rotp p[MEM_LAT]
-
+ or t0=t0,in2
UNW(.save ar.lc, saved_lc)
mov saved_lc=ar.lc
-
- or t0=in0,in1
UNW(.save pr, saved_pr)
mov saved_pr=pr
- UNW(.body)
-
- mov ar.ec=MEM_LAT
+ cmp.eq p6,p0=in2,r0 // zero length?
+ mov retval=in0 // return dst
+(p6) br.ret.spnt.many rp // zero length, return immediately
+ ;;
- mov r8=in0 // return dst
- shr cnt=in2,3 // number of 8-byte words to copy
+ mov dst=in0 // copy because of rotation
+ shr.u cnt=in2,3 // number of 8-byte words to copy
mov pr.rot=1<<16
;;
- cmp.eq p6,p0=in2,r0 // zero length?
- or t0=t0,in2
-(p6) br.ret.spnt.many rp // yes, return immediately
- mov dst=in0 // copy because of rotation
- mov src=in1 // copy because of rotation
adds cnt=-1,cnt // br.ctop is repeat/until
+ cmp.gtu p7,p0=16,in2 // copying less than 16 bytes?
+ UNW(.body)
+ mov ar.ec=N
;;
+
and t0=0x7,t0
mov ar.lc=cnt
;;
cmp.ne p6,p0=t0,r0
-(p6) br.cond.spnt.few slow_memcpy
+ mov src=in1 // copy because of rotation
+(p7) br.cond.spnt.few memcpy_short
+(p6) br.cond.spnt.few memcpy_long
+ ;;
+ .rotr val[N]
+ .rotp p[N]
1:
(p[0]) ld8 val[0]=[src],8
-(p[N]) st8 [dst]=val[N],8
- br.ctop.sptk.few 1b
+(p[N-1])st8 [dst]=val[N-1],8
+ br.ctop.dptk.few 1b
;;
-.exit:
mov ar.lc=saved_lc
- mov pr=saved_pr,0xffffffffffff0000
+ mov pr=saved_pr,-1
mov ar.pfs=saved_pfs
br.ret.sptk.many rp
-slow_memcpy:
- adds cnt=-1,in2
+ /*
+ * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
+ * copy loop. This performs relatively poorly on Itanium, but it doesn't
+ * get used very often (gcc inlines small copies) and due to atomicity
+ * issues, we want to avoid read-modify-write of entire words.
+ */
+ .align 32
+memcpy_short:
+ adds cnt=-1,in2 // br.ctop is repeat/until
+ mov ar.ec=MEM_LAT
;;
mov ar.lc=cnt
;;
+ /*
+ * It is faster to put a stop bit in the loop here because it makes
+ * the pipeline shorter (and latency is what matters on short copies).
+ */
1:
(p[0]) ld1 val[0]=[src],1
-(p[N]) st1 [dst]=val[N],1
- br.ctop.sptk.few 1b
- br.sptk.few .exit
+ ;;
+(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
+ br.ctop.dptk.few 1b
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,-1
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.many rp
+
+ /*
+ * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't
+ * an overriding concern here, but throughput is. We first do
+ * sub-word copying until the destination is aligned, then we check
+ * if the source is also aligned. If so, we do a simple load/store-loop
+ * until there are less than 8 bytes left over and then we do the tail,
+ * by storing the last few bytes using sub-word copying. If the source
+ * is not aligned, we branch off to the non-congruent loop.
+ *
+ * stage: op:
+ * 0 ld
+ * :
+ * MEM_LAT+3 shrp
+ * MEM_LAT+4 st
+ *
+ * On Itanium, the pipeline itself runs without stalls. However, br.ctop
+ * seems to introduce an unavoidable bubble in the pipeline so the overall
+ * latency is 2 cycles/iteration. This gives us a _copy_ throughput
+ * of 4 byte/cycle. Still not bad.
+ */
+# undef N
+# undef Nrot
+# define N (MEM_LAT + 5) /* number of stages */
+# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */
+
+#define LOG_LOOP_SIZE 6
+
+memcpy_long:
+ alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame
+ and t0=-8,src // t0 = src & ~7
+ and t2=7,src // t2 = src & 7
+ ;;
+ ld8 t0=[t0] // t0 = 1st source word
+ adds src2=7,src // src2 = (src + 7)
+ sub t4=r0,dst // t4 = -dst
+ ;;
+ and src2=-8,src2 // src2 = (src + 7) & ~7
+ shl t2=t2,3 // t2 = 8*(src & 7)
+ shl t4=t4,3 // t4 = 8*(dst & 7)
+ ;;
+ ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
+ sub t3=64,t2 // t3 = 64-8*(src & 7)
+ shr.u t0=t0,t2
+ ;;
+ add src_end=src,in2
+ shl t1=t1,t3
+ mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7)
+ ;;
+ or t0=t0,t1
+ mov cnt=r0
+ adds src_end=-1,src_end
+ ;;
+(p3) st1 [dst]=t0,1
+(p3) shr.u t0=t0,8
+(p3) adds cnt=1,cnt
+ ;;
+(p4) st2 [dst]=t0,2
+(p4) shr.u t0=t0,16
+(p4) adds cnt=2,cnt
+ ;;
+(p5) st4 [dst]=t0,4
+(p5) adds cnt=4,cnt
+ and src_end=-8,src_end // src_end = last word of source buffer
+ ;;
+
+ // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
+
+1:{ add src=cnt,src // make src point to remainder of source buffer
+ sub cnt=in2,cnt // cnt = number of bytes left to copy
+ mov t4=ip
+ } ;;
+ and src2=-8,src // align source pointer
+ adds t4=memcpy_loops-1b,t4
+ mov ar.ec=N
+
+ and t0=7,src // t0 = src & 7
+ shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy
+ shl cnt=cnt,3 // move bits 0-2 to 3-5
+ ;;
+
+ .rotr val[N+1], w[2]
+ .rotp p[N]
+
+ cmp.ne p6,p0=t0,r0 // is src aligned, too?
+ shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7)
+ adds t2=-1,t2 // br.ctop is repeat/until
+ ;;
+ add t4=t0,t4
+ mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy
+ mov ar.lc=t2
+ ;;
+(p6) ld8 val[1]=[src2],8 // prime the pump...
+ mov b6=t4
+ br.sptk.few b6
+ ;;
+
+memcpy_tail:
+ // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
+ // less than 8) and t0 contains the last few bytes of the src buffer:
+(p5) st4 [dst]=t0,4
+(p5) shr.u t0=t0,32
+ mov ar.lc=saved_lc
+ ;;
+(p4) st2 [dst]=t0,2
+(p4) shr.u t0=t0,16
+ mov ar.pfs=saved_pfs
+ ;;
+(p3) st1 [dst]=t0
+ mov pr=saved_pr,-1
+ br.ret.sptk.many rp
+
+///////////////////////////////////////////////////////
+ .align 64
+
+#define COPY(shift,index) \
+ 1: \
+ { .mfi \
+ (p[0]) ld8 val[0]=[src2],8; \
+ nop.f 0; \
+ (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \
+ }; \
+ { .mbb \
+ (p[MEM_LAT+4]) st8 [dst]=w[1],8; \
+ nop.b 0; \
+ br.ctop.dptk.few 1b; \
+ }; \
+ ;; \
+ ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \
+ ;; \
+ shrp t0=val[N-1],val[N-index],shift; \
+ br memcpy_tail
+memcpy_loops:
+ COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
+ COPY(8, 0)
+ COPY(16, 0)
+ COPY(24, 0)
+ COPY(32, 0)
+ COPY(40, 0)
+ COPY(48, 0)
+ COPY(56, 0)
END(memcpy)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 8ddda7e11..3652cfc80 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -185,8 +185,42 @@ free_initmem (void)
void
free_initrd_mem(unsigned long start, unsigned long end)
{
+ /*
+ * EFI uses 4KB pages while the kernel can use 4KB or bigger.
+ * Thus EFI and the kernel may have different page sizes. It is
+ * therefore possible to have the initrd share the same page as
+ * the end of the kernel (given current setup).
+ *
+ * To avoid freeing/using the wrong page (kernel sized) we:
+ * - align up the beginning of initrd
+ * - keep the end untouched
+ *
+ * | |
+ * |=============| a000
+ * | |
+ * | |
+ * | | 9000
+ * |/////////////|
+ * |/////////////|
+ * |=============| 8000
+ * |///INITRD////|
+ * |/////////////|
+ * |/////////////| 7000
+ * | |
+ * |KKKKKKKKKKKKK|
+ * |=============| 6000
+ * |KKKKKKKKKKKKK|
+ * |KKKKKKKKKKKKK|
+ * K=kernel using 8KB pages
+ *
+ * In this example, we must free page 8000 ONLY. So we must align up
+ * initrd_start and keep initrd_end as is.
+ */
+ start = PAGE_ALIGN(start);
+
if (start < end)
printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
+
for (; start < end; start += PAGE_SIZE) {
clear_bit(PG_reserved, &virt_to_page(start)->flags);
set_page_count(virt_to_page(start), 1);
@@ -423,5 +457,4 @@ mem_init (void)
#ifdef CONFIG_IA32_SUPPORT
ia32_gdt_init();
#endif
- return;
}
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 026f88998..875ce446c 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -1,8 +1,11 @@
/*
* TLB support routines.
*
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 08/02/00 A. Mallick <asit.k.mallick@intel.com>
+ * Modified RID allocation for SMP
*/
#include <linux/config.h>
#include <linux/init.h>
@@ -27,9 +30,11 @@
1 << _PAGE_SIZE_8K | \
1 << _PAGE_SIZE_4K )
-static void wrap_context (struct mm_struct *mm);
-
-unsigned long ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
+struct ia64_ctx ia64_ctx = {
+ lock: SPIN_LOCK_UNLOCKED,
+ next: 1,
+ limit: (1UL << IA64_HW_CONTEXT_BITS)
+};
/*
* Put everything in a struct so we avoid the global offset table whenever
@@ -106,49 +111,43 @@ flush_tlb_no_ptcg (unsigned long start, unsigned long end, unsigned long nbits)
#endif /* CONFIG_SMP && !CONFIG_ITANIUM_PTCG */
-void
-get_new_mmu_context (struct mm_struct *mm)
-{
- if ((ia64_next_context & IA64_HW_CONTEXT_MASK) == 0) {
- wrap_context(mm);
- }
- mm->context = ia64_next_context++;
-}
-
/*
- * This is where we handle the case where (ia64_next_context &
- * IA64_HW_CONTEXT_MASK) == 0. Whenever this happens, we need to
- * flush the entire TLB and skip over region id number 0, which is
- * used by the kernel.
+ * Acquire the ia64_ctx.lock before calling this function!
*/
-static void
-wrap_context (struct mm_struct *mm)
+void
+wrap_mmu_context (struct mm_struct *mm)
{
- struct task_struct *task;
+ struct task_struct *tsk;
+ unsigned long tsk_context;
+
+ if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS))
+ ia64_ctx.next = 300; /* skip daemons */
+ ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
/*
- * We wrapped back to the first region id so we nuke the TLB
- * so we can switch to the next generation of region ids.
+ * Scan all the task's mm->context and set proper safe range
*/
- __flush_tlb_all();
- if (ia64_next_context++ == 0) {
- /*
- * Oops, we've used up all 64 bits of the context
- * space---walk through task table to ensure we don't
- * get tricked into using an old context. If this
- * happens, the machine has been running for a long,
- * long time!
- */
- ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
- read_lock(&tasklist_lock);
- for_each_task (task) {
- if (task->mm == mm)
- continue;
- flush_tlb_mm(mm);
+ read_lock(&tasklist_lock);
+ repeat:
+ for_each_task(tsk) {
+ if (!tsk->mm)
+ continue;
+ tsk_context = tsk->mm->context;
+ if (tsk_context == ia64_ctx.next) {
+ if (++ia64_ctx.next >= ia64_ctx.limit) {
+ /* empty range: reset the range limit and start over */
+ if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS))
+ ia64_ctx.next = 300;
+ ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
+ goto repeat;
+ }
}
- read_unlock(&tasklist_lock);
+ if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
+ ia64_ctx.limit = tsk_context;
}
+ read_unlock(&tasklist_lock);
+ flush_tlb_all();
}
void
diff --git a/arch/ia64/sn/sn1/irq.c b/arch/ia64/sn/sn1/irq.c
index df8e56943..a8270fd2a 100644
--- a/arch/ia64/sn/sn1/irq.c
+++ b/arch/ia64/sn/sn1/irq.c
@@ -1,9 +1,10 @@
#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
-#include <asm/irq.h>
#include <asm/ptrace.h>
-static int
+static unsigned int
sn1_startup_irq(unsigned int irq)
{
return(0);
@@ -24,23 +25,16 @@ sn1_enable_irq(unsigned int irq)
{
}
-static int
-sn1_handle_irq(unsigned int irq, struct pt_regs *regs)
-{
- return(0);
-}
-
struct hw_interrupt_type irq_type_sn1 = {
"sn1_irq",
sn1_startup_irq,
sn1_shutdown_irq,
- sn1_handle_irq,
sn1_enable_irq,
sn1_disable_irq
};
void
-sn1_irq_init (struct irq_desc desc[NR_IRQS])
+sn1_irq_init (void)
{
int i;
diff --git a/arch/ia64/sn/sn1/machvec.c b/arch/ia64/sn/sn1/machvec.c
index 2e36b2e08..409d9a2ea 100644
--- a/arch/ia64/sn/sn1/machvec.c
+++ b/arch/ia64/sn/sn1/machvec.c
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME sn1
#include <asm/machvec_init.h>
-#include <asm/machvec_sn1.h>
-
-MACHVEC_DEFINE(sn1)
diff --git a/arch/ia64/sn/sn1/setup.c b/arch/ia64/sn/sn1/setup.c
index 45242fc26..7b397bb6b 100644
--- a/arch/ia64/sn/sn1/setup.c
+++ b/arch/ia64/sn/sn1/setup.c
@@ -13,6 +13,7 @@
#include <linux/console.h>
#include <linux/timex.h>
#include <linux/sched.h>
+#include <linux/ioport.h>
#include <asm/io.h>
#include <asm/machvec.h>
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index 5c199bc1a..c8c13363c 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -46,6 +46,15 @@ SECTIONS
{ *(__ex_table) }
__stop___ex_table = .;
+#if defined(CONFIG_IA64_GENERIC)
+ /* Machine Vector */
+ . = ALIGN(16);
+ machvec_start = .;
+ .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
+ { *(.machvec) }
+ machvec_end = .;
+#endif
+
__start___ksymtab = .; /* Kernel symbol table */
__ksymtab : AT(ADDR(__ksymtab) - PAGE_OFFSET)
{ *(__ksymtab) }