diff options
Diffstat (limited to 'arch/sparc64/kernel')
28 files changed, 2599 insertions, 852 deletions
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index fbeb83126..6934dda6e 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.41 1998/10/11 06:58:14 davem Exp $ +# $Id: Makefile,v 1.43 1999/01/02 16:45:53 davem Exp $ # Makefile for the linux kernel. # # Note! Dependencies are done automagically by 'make dep', which also @@ -20,7 +20,7 @@ O_OBJS := process.o setup.o cpu.o idprom.o \ traps.o devices.o auxio.o ioport.o \ irq.o ptrace.o time.o sys_sparc.o signal.o \ unaligned.o sys_sunos32.o sunos_ioctl32.o \ - central.o psycho.o + central.o psycho.o starfire.o OX_OBJS := sparc64_ksyms.o ifdef CONFIG_PCI diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index dc898e300..450451ac0 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -9,7 +9,6 @@ #include <linux/module.h> -#include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -18,9 +17,10 @@ #include <linux/errno.h> #include <linux/signal.h> #include <linux/string.h> +#include <linux/fs.h> +#include <linux/file.h> #include <linux/stat.h> #include <linux/fcntl.h> -#include <linux/file.h> #include <linux/ptrace.h> #include <linux/user.h> #include <linux/malloc.h> @@ -58,14 +58,13 @@ static void set_brk(unsigned long start, unsigned long end) * macros to write out all the necessary info. */ #define DUMP_WRITE(addr,nr) \ -while (file.f_op->write(&file,(char *)(addr),(nr),&file.f_pos) != (nr)) \ - goto close_coredump +while (file->f_op->write(file,(char *)(addr),(nr),&file->f_pos) != (nr)) goto close_coredump #define DUMP_SEEK(offset) \ -if (file.f_op->llseek) { \ - if (file.f_op->llseek(&file,(offset),0) != (offset)) \ +if (file->f_op->llseek) { \ + if (file->f_op->llseek(file,(offset),0) != (offset)) \ goto close_coredump; \ -} else file.f_pos = (offset) +} else file->f_pos = (offset) /* * Routine writes a core dump image in the current directory. @@ -82,7 +81,7 @@ do_aout32_core_dump(long signr, struct pt_regs * regs) { struct dentry * dentry = NULL; struct inode * inode = NULL; - struct file file; + struct file * file; mm_segment_t fs; int has_dumped = 0; char corefile[6+sizeof(current->comm)]; @@ -106,29 +105,16 @@ do_aout32_core_dump(long signr, struct pt_regs * regs) #else corefile[4] = '\0'; #endif - dentry = open_namei(corefile,O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); - if (IS_ERR(dentry)) { - dentry = NULL; + file = filp_open(corefile,O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); + if (IS_ERR(file)) goto end_coredump; - } + dentry = file->f_dentry; inode = dentry->d_inode; if (!S_ISREG(inode->i_mode)) - goto end_coredump; + goto close_coredump; if (!inode->i_op || !inode->i_op->default_file_ops) - goto end_coredump; - if (get_write_access(inode)) - goto end_coredump; - file.f_mode = 3; - file.f_flags = 0; - file.f_count = 1; - file.f_dentry = dentry; - file.f_pos = 0; - file.f_reada = 0; - file.f_op = inode->i_op->default_file_ops; - if (file.f_op->open) - if (file.f_op->open(inode,&file)) - goto done_coredump; - if (!file.f_op->write) + goto close_coredump; + if (!file->f_op->write) goto close_coredump; has_dumped = 1; current->flags |= PF_DUMPCORE; @@ -175,13 +161,9 @@ do_aout32_core_dump(long signr, struct pt_regs * regs) set_fs(KERNEL_DS); DUMP_WRITE(current,sizeof(*current)); close_coredump: - if (file.f_op->release) - file.f_op->release(inode,&file); -done_coredump: - put_write_access(inode); + filp_close(file, NULL); end_coredump: set_fs(fs); - dput(dentry); return has_dumped; } @@ -269,7 +251,6 @@ static inline int do_load_aout32_binary(struct linux_binprm * bprm, return -ENOEXEC; } - current->personality = PER_LINUX; fd_offset = N_TXTOFF(ex); /* Check initial limits. This avoids letting people circumvent @@ -288,6 +269,8 @@ static inline int do_load_aout32_binary(struct linux_binprm * bprm, return retval; /* OK, This is the point of no return */ + current->personality = PER_LINUX; + current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); current->mm->end_data = ex.a_data + @@ -297,8 +280,7 @@ static inline int do_load_aout32_binary(struct linux_binprm * bprm, current->mm->rss = 0; current->mm->mmap = NULL; - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == NMAGIC) { /* Fuck me plenty... */ @@ -404,48 +386,44 @@ static inline int do_load_aout32_library(int fd) { struct file * file; - struct exec ex; - struct dentry * dentry; struct inode * inode; - unsigned int len; - unsigned int bss; - unsigned int start_addr; + unsigned long bss, start_addr, len; unsigned long error; + int retval; + loff_t offset = 0; + struct exec ex; - file = fcheck(fd); - - if (!file || !file->f_op) - return -EACCES; - - dentry = file->f_dentry; - inode = dentry->d_inode; - - /* Seek into the file */ - if (file->f_op->llseek) { - if ((error = file->f_op->llseek(file, 0, 0)) != 0) - return -ENOEXEC; - } else - file->f_pos = 0; + retval = -EACCES; + file = fget(fd); + if (!file) + goto out; + if (!file->f_op) + goto out_putf; + inode = file->f_dentry->d_inode; + retval = -ENOEXEC; + /* N.B. Save current fs? */ set_fs(KERNEL_DS); - error = file->f_op->read(file, (char *) &ex, sizeof(ex), &file->f_pos); + error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset); set_fs(USER_DS); if (error != sizeof(ex)) - return -ENOEXEC; + goto out_putf; /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { - return -ENOEXEC; + goto out_putf; } + if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); - return -ENOEXEC; + goto out_putf; } - if (N_FLAGS(ex)) return -ENOEXEC; + if (N_FLAGS(ex)) + goto out_putf; /* For QMAGIC, the starting address is 0x20 into the page. We mask this off to get the starting address for the page */ @@ -457,18 +435,26 @@ do_load_aout32_library(int fd) PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); + retval = error; if (error != start_addr) - return error; + goto out_putf; + len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - error = do_mmap(NULL, start_addr + len, bss-len, - PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_PRIVATE|MAP_FIXED, 0); + error = do_mmap(NULL, start_addr + len, bss - len, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_FIXED, 0); + retval = error; if (error != start_addr + len) - return error; + goto out_putf; } - return 0; + retval = 0; + +out_putf: + fput(file); +out: + return retval; } static int diff --git a/arch/sparc64/kernel/central.c b/arch/sparc64/kernel/central.c index 1d1cb25e1..198841f89 100644 --- a/arch/sparc64/kernel/central.c +++ b/arch/sparc64/kernel/central.c @@ -1,4 +1,4 @@ -/* $Id: central.c,v 1.6 1998/05/14 13:35:45 jj Exp $ +/* $Id: central.c,v 1.11 1998/12/14 12:18:16 davem Exp $ * central.c: Central FHC driver for Sunfire/Starfire/Wildfire. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -7,11 +7,17 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/string.h> +#include <linux/timer.h> +#include <linux/sched.h> +#include <linux/delay.h> #include <asm/page.h> #include <asm/fhc.h> struct linux_central *central_bus = NULL; +struct linux_fhc *fhc_list = NULL; + +#define IS_CENTRAL_FHC(__fhc) ((__fhc) == central_bus->child) static inline unsigned long long_align(unsigned long addr) { @@ -22,6 +28,156 @@ static inline unsigned long long_align(unsigned long addr) extern void prom_central_ranges_init(int cnode, struct linux_central *central); extern void prom_fhc_ranges_init(int fnode, struct linux_fhc *fhc); +static unsigned long probe_other_fhcs(unsigned long memory_start) +{ + struct linux_prom64_registers fpregs[6]; + char namebuf[128]; + int node; + + node = prom_getchild(prom_root_node); + node = prom_searchsiblings(node, "fhc"); + if (node == 0) { + prom_printf("FHC: Cannot find any toplevel firehose controllers.\n"); + prom_halt(); + } + while(node) { + struct linux_fhc *fhc; + int board; + u32 tmp; + + fhc = (struct linux_fhc *)memory_start; + memory_start += sizeof(struct linux_fhc); + memory_start = long_align(memory_start); + + /* Link it into the FHC chain. */ + fhc->next = fhc_list; + fhc_list = fhc; + + /* Toplevel FHCs have no parent. */ + fhc->parent = NULL; + + fhc->prom_node = node; + prom_getstring(node, "name", namebuf, sizeof(namebuf)); + strcpy(fhc->prom_name, namebuf); + prom_fhc_ranges_init(node, fhc); + + /* Non-central FHC's have 64-bit OBP format registers. */ + if(prom_getproperty(node, "reg", + (char *)&fpregs[0], sizeof(fpregs)) == -1) { + prom_printf("FHC: Fatal error, cannot get fhc regs.\n"); + prom_halt(); + } + + /* Only central FHC needs special ranges applied. */ + fhc->fhc_regs.pregs = (struct fhc_internal_regs *) + __va(fpregs[0].phys_addr); + fhc->fhc_regs.ireg = (struct fhc_ign_reg *) + __va(fpregs[1].phys_addr); + fhc->fhc_regs.ffregs = (struct fhc_fanfail_regs *) + __va(fpregs[2].phys_addr); + fhc->fhc_regs.sregs = (struct fhc_system_regs *) + __va(fpregs[3].phys_addr); + fhc->fhc_regs.uregs = (struct fhc_uart_regs *) + __va(fpregs[4].phys_addr); + fhc->fhc_regs.tregs = (struct fhc_tod_regs *) + __va(fpregs[5].phys_addr); + + board = prom_getintdefault(node, "board#", -1); + fhc->board = board; + + tmp = fhc->fhc_regs.pregs->fhc_jtag_ctrl; + if((tmp & FHC_JTAG_CTRL_MENAB) != 0) + fhc->jtag_master = 1; + else + fhc->jtag_master = 0; + + tmp = fhc->fhc_regs.pregs->fhc_id; + printk("FHC(board %d): Version[%x] PartID[%x] Manuf[%x] %s\n", + board, + (tmp & FHC_ID_VERS) >> 28, + (tmp & FHC_ID_PARTID) >> 12, + (tmp & FHC_ID_MANUF) >> 1, + (fhc->jtag_master ? "(JTAG Master)" : "")); + + /* This bit must be set in all non-central FHC's in + * the system. When it is clear, this identifies + * the central board. + */ + fhc->fhc_regs.pregs->fhc_control |= FHC_CONTROL_IXIST; + + /* Look for the next FHC. */ + node = prom_getsibling(node); + if(node == 0) + break; + node = prom_searchsiblings(node, "fhc"); + if(node == 0) + break; + } + + return memory_start; +} + +static void probe_clock_board(struct linux_central *central, + struct linux_fhc *fhc, + int cnode, int fnode) +{ + struct linux_prom_registers cregs[3]; + int clknode, nslots, tmp, nregs; + + clknode = prom_searchsiblings(prom_getchild(fnode), "clock-board"); + if(clknode == 0 || clknode == -1) { + prom_printf("Critical error, central lacks clock-board.\n"); + prom_halt(); + } + nregs = prom_getproperty(clknode, "reg", (char *)&cregs[0], sizeof(cregs)); + if (nregs == -1) { + prom_printf("CENTRAL: Fatal error, cannot map clock-board regs.\n"); + prom_halt(); + } + nregs /= sizeof(struct linux_prom_registers); + prom_apply_fhc_ranges(fhc, &cregs[0], nregs); + prom_apply_central_ranges(central, &cregs[0], nregs); + central->cfreg = (volatile u8 *) + __va((((unsigned long)cregs[0].which_io) << 32) | + (((unsigned long)cregs[0].phys_addr)+0x02)); + central->clkregs = (struct clock_board_regs *) + __va((((unsigned long)cregs[1].which_io) << 32) | + (((unsigned long)cregs[1].phys_addr))); + if(nregs == 2) + central->clkver = NULL; + else + central->clkver = (volatile u8 *) + __va((((unsigned long)cregs[2].which_io) << 32) | + (((unsigned long)cregs[2].phys_addr))); + + tmp = central->clkregs->stat1; + tmp &= 0xc0; + switch(tmp) { + case 0x40: + nslots = 16; + break; + case 0xc0: + nslots = 8; + break; + case 0x80: + if(central->clkver != NULL && + *(central->clkver) != 0) { + if((*(central->clkver) & 0x80) != 0) + nslots = 4; + else + nslots = 5; + break; + } + default: + nslots = 4; + break; + }; + central->slots = nslots; + printk("CENTRAL: Detected %d slot Enterprise system. cfreg[%02x] cver[%02x]\n", + central->slots, *(central->cfreg), + (central->clkver ? *(central->clkver) : 0x00)); +} + unsigned long central_probe(unsigned long memory_start) { struct linux_prom_registers fpregs[6]; @@ -30,9 +186,12 @@ unsigned long central_probe(unsigned long memory_start) int cnode, fnode, err; cnode = prom_finddevice("/central"); - if(cnode == 0 || cnode == -1) + if(cnode == 0 || cnode == -1) { + extern void starfire_check(void); + + starfire_check(); return memory_start; - printk("CENTRAL: found central PROM node %08x.\n", cnode); + } /* Ok we got one, grab some memory for software state. */ memory_start = long_align(memory_start); @@ -54,7 +213,9 @@ unsigned long central_probe(unsigned long memory_start) prom_central_ranges_init(cnode, central_bus); /* And then central's FHC. */ - fhc->next = NULL; + fhc->next = fhc_list; + fhc_list = fhc; + fhc->parent = central_bus; fnode = prom_searchsiblings(prom_getchild(cnode), "fhc"); if(fnode == 0 || fnode == -1) { @@ -67,9 +228,9 @@ unsigned long central_probe(unsigned long memory_start) prom_fhc_ranges_init(fnode, fhc); - /* Finally, map in FHC register set. */ + /* Now, map in FHC register set. */ if (prom_getproperty(fnode, "reg", (char *)&fpregs[0], sizeof(fpregs)) == -1) { - prom_printf("CENTRAL: fatal error, cannot get fhc regs.\n"); + prom_printf("CENTRAL: Fatal error, cannot get fhc regs.\n"); prom_halt(); } prom_apply_central_ranges(central_bus, &fpregs[0], 6); @@ -93,11 +254,144 @@ unsigned long central_probe(unsigned long memory_start) __va((((unsigned long)fpregs[5].which_io)<<32) | (((unsigned long)fpregs[5].phys_addr))); + /* Obtain board number from board status register, Central's + * FHC lacks "board#" property. + */ + err = fhc->fhc_regs.pregs->fhc_bsr; + fhc->board = (((err >> 16) & 0x01) | + ((err >> 12) & 0x0e)); + + fhc->jtag_master = 0; + + /* Attach the clock board registers for CENTRAL. */ + probe_clock_board(central_bus, fhc, cnode, fnode); + err = fhc->fhc_regs.pregs->fhc_id; - printk("FHC Version[%x] PartID[%x] Manufacturer[%x]\n", + printk("FHC(board %d): Version[%x] PartID[%x] Manuf[%x] (CENTRAL)\n", + fhc->board, ((err & FHC_ID_VERS) >> 28), ((err & FHC_ID_PARTID) >> 12), ((err & FHC_ID_MANUF) >> 1)); - return memory_start; + return probe_other_fhcs(memory_start); +} + +static __inline__ void fhc_ledblink(struct linux_fhc *fhc, int on) +{ + volatile u32 *ctrl = (volatile u32 *) + &fhc->fhc_regs.pregs->fhc_control; + u32 tmp; + + tmp = *ctrl; + + /* NOTE: reverse logic on this bit */ + if (on) + tmp &= ~(FHC_CONTROL_RLED); + else + tmp |= FHC_CONTROL_RLED; + tmp &= ~(FHC_CONTROL_AOFF | FHC_CONTROL_BOFF | FHC_CONTROL_SLINE); + + *ctrl = tmp; + tmp = *ctrl; +} + +static __inline__ void central_ledblink(struct linux_central *central, int on) +{ + volatile u8 *ctrl = (volatile u8 *) ¢ral->clkregs->control; + int tmp; + + tmp = *ctrl; + + /* NOTE: reverse logic on this bit */ + if(on) + tmp &= ~(CLOCK_CTRL_RLED); + else + tmp |= CLOCK_CTRL_RLED; + + *ctrl = tmp; + tmp = *ctrl; +} + +static struct timer_list sftimer; +static int led_state; + +static void sunfire_timer(unsigned long __ignored) +{ + struct linux_fhc *fhc; + + central_ledblink(central_bus, led_state); + for(fhc = fhc_list; fhc != NULL; fhc = fhc->next) + if(! IS_CENTRAL_FHC(fhc)) + fhc_ledblink(fhc, led_state); + led_state = ! led_state; + sftimer.expires = jiffies + (HZ >> 1); + add_timer(&sftimer); +} + +/* After PCI/SBUS busses have been probed, this is called to perform + * final initialization of all FireHose Controllers in the system. + */ +void firetruck_init(void) +{ + struct linux_central *central = central_bus; + struct linux_fhc *fhc; + + /* No central bus, nothing to do. */ + if (central == NULL) + return; + + for(fhc = fhc_list; fhc != NULL; fhc = fhc->next) { + volatile u32 *ctrl = (volatile u32 *) + &fhc->fhc_regs.pregs->fhc_control; + u32 tmp; + + /* Clear all of the interrupt mapping registers + * just in case OBP left them in a foul state. + */ +#define ZAP(REG1, REG2) \ +do { volatile u32 *__iclr = (volatile u32 *)(&(REG1)); \ + volatile u32 *__imap = (volatile u32 *)(&(REG2)); \ + *(__iclr) = 0; \ + (void) *(__iclr); \ + *(__imap) &= ~(0x80000000); \ + (void) *(__imap); \ +} while(0) + + ZAP(fhc->fhc_regs.ffregs->fhc_ff_iclr, + fhc->fhc_regs.ffregs->fhc_ff_imap); + ZAP(fhc->fhc_regs.sregs->fhc_sys_iclr, + fhc->fhc_regs.sregs->fhc_sys_imap); + ZAP(fhc->fhc_regs.uregs->fhc_uart_iclr, + fhc->fhc_regs.uregs->fhc_uart_imap); + ZAP(fhc->fhc_regs.tregs->fhc_tod_iclr, + fhc->fhc_regs.tregs->fhc_tod_imap); + +#undef ZAP + + /* Setup FHC control register. */ + tmp = *ctrl; + + /* All non-central boards have this bit set. */ + if(! IS_CENTRAL_FHC(fhc)) + tmp |= FHC_CONTROL_IXIST; + + /* For all FHCs, clear the firmware synchronization + * line and both low power mode enables. + */ + tmp &= ~(FHC_CONTROL_AOFF | FHC_CONTROL_BOFF | FHC_CONTROL_SLINE); + *ctrl = tmp; + tmp = *ctrl; /* Ensure completion */ + } + + /* OBP leaves it on, turn it off so clock board timer LED + * is in sync with FHC ones. + */ + central->clkregs->control &= ~(CLOCK_CTRL_RLED); + + led_state = 0; + init_timer(&sftimer); + sftimer.data = 0; + sftimer.function = &sunfire_timer; + sftimer.expires = jiffies + (HZ >> 1); + add_timer(&sftimer); } diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c index 86efc4bb7..86518e50e 100644 --- a/arch/sparc64/kernel/cpu.c +++ b/arch/sparc64/kernel/cpu.c @@ -49,11 +49,11 @@ struct cpu_iu_info linux_sparc_chips[] = { #define NSPARCCHIPS (sizeof(linux_sparc_chips)/sizeof(struct cpu_iu_info)) #ifdef __SMP__ -char *sparc_cpu_type[NR_CPUS] = { "cpu-oops", "cpu-oops1", "cpu-oops2", "cpu-oops3" }; -char *sparc_fpu_type[NR_CPUS] = { "fpu-oops", "fpu-oops1", "fpu-oops2", "fpu-oops3" }; +char *sparc_cpu_type[64] = { "cpu-oops", "cpu-oops1", "cpu-oops2", "cpu-oops3" }; +char *sparc_fpu_type[64] = { "fpu-oops", "fpu-oops1", "fpu-oops2", "fpu-oops3" }; #else -char *sparc_cpu_type[NR_CPUS] = { "cpu-oops", }; -char *sparc_fpu_type[NR_CPUS] = { "fpu-oops", }; +char *sparc_cpu_type[64] = { "cpu-oops", }; +char *sparc_fpu_type[64] = { "fpu-oops", }; #endif unsigned int fsr_storage; @@ -65,11 +65,11 @@ __initfunc(void cpu_probe(void)) long ver, fpu_vers; long fprs; - cpuid = smp_processor_id(); + cpuid = hard_smp_processor_id(); fprs = fprs_read (); fprs_write (FPRS_FEF); - __asm__ __volatile__ ("rdpr %%ver, %0; stx %%fsr, [%1]" : "=r" (ver) : "r" (&fpu_vers)); + __asm__ __volatile__ ("rdpr %%ver, %0; stx %%fsr, [%1]" : "=&r" (ver) : "r" (&fpu_vers)); fprs_write (fprs); manuf = ((ver >> 48)&0xffff); @@ -88,7 +88,7 @@ __initfunc(void cpu_probe(void)) if(i==NSPARCCHIPS) { printk("DEBUG: manuf = 0x%x impl = 0x%x\n", manuf, impl); - sparc_cpu_type[cpuid] = "Unknow CPU"; + sparc_cpu_type[cpuid] = "Unknown CPU"; } for(i = 0; i<NSPARCFPU; i++) { diff --git a/arch/sparc64/kernel/devices.c b/arch/sparc64/kernel/devices.c index 93ea8ca4f..0aef0b019 100644 --- a/arch/sparc64/kernel/devices.c +++ b/arch/sparc64/kernel/devices.c @@ -13,8 +13,8 @@ #include <asm/system.h> #include <asm/smp.h> -struct prom_cpuinfo linux_cpus[NR_CPUS] __initdata = { { 0 } }; -unsigned prom_cpu_nodes[NR_CPUS]; +struct prom_cpuinfo linux_cpus[64] __initdata = { { 0 } }; +unsigned prom_cpu_nodes[64]; int linux_num_cpus = 0; extern void cpu_probe(void); @@ -25,11 +25,12 @@ device_scan(unsigned long mem_start)) { char node_str[128]; int nd, prom_node_cpu, thismid; - int cpu_nds[NR_CPUS]; /* One node for each cpu */ + int cpu_nds[64]; /* One node for each cpu */ int cpu_ctr = 0; prom_getstring(prom_root_node, "device_type", node_str, sizeof(node_str)); + prom_printf("Booting Linux...\n"); if(strcmp(node_str, "cpu") == 0) { cpu_nds[0] = prom_root_node; linux_cpus[0].prom_node = prom_root_node; @@ -38,7 +39,7 @@ device_scan(unsigned long mem_start)) } else { int scan; scan = prom_getchild(prom_root_node); - prom_printf("root child is %08x\n", (unsigned) scan); + /* prom_printf("root child is %08x\n", (unsigned) scan); */ nd = 0; while((scan = prom_getsibling(scan)) != 0) { prom_getstring(scan, "device_type", node_str, sizeof(node_str)); @@ -49,11 +50,11 @@ device_scan(unsigned long mem_start)) (char *) &thismid, sizeof(thismid)); linux_cpus[cpu_ctr].mid = thismid; #ifdef __SMP__ - prom_printf("Found CPU %d (node=%08x,mid=%d)\n", - cpu_ctr, (unsigned) scan, - thismid); - printk("Found CPU %d (node=%08x,mid=%d)\n", - cpu_ctr, (unsigned) scan, thismid); + /* Don't pollute PROM screen with these messages. If the kernel is screwed enough + that console does not start up, then we don't care how many CPUs have been found, + if it starts up, the user can use console=prom to see it. */ + /* prom_printf("Found CPU %d (node=%08x,mid=%d)\n", cpu_ctr, (unsigned) scan, thismid); */ + printk("Found CPU %d (node=%08x,mid=%d)\n", cpu_ctr, (unsigned) scan, thismid); #endif cpu_ctr++; } @@ -72,6 +73,9 @@ device_scan(unsigned long mem_start)) prom_cpu_nodes[0] = prom_node_cpu; + mem_start = central_probe(mem_start); + cpu_probe(); - return central_probe(mem_start); + + return mem_start; } diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S index 6207101fd..9fe613a51 100644 --- a/arch/sparc64/kernel/dtlb_backend.S +++ b/arch/sparc64/kernel/dtlb_backend.S @@ -1,4 +1,4 @@ -/* $Id: dtlb_backend.S,v 1.6 1998/09/24 03:21:32 davem Exp $ +/* $Id: dtlb_backend.S,v 1.7 1998/12/16 04:33:28 davem Exp $ * dtlb_backend.S: Back end to DTLB miss replacement strategy. * This is included directly into the trap table. * @@ -37,28 +37,30 @@ be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus? and %g1, 0xffe, %g1 ! Mask PMD offset bits brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke - nop ! Pipe bubble... + add %g1, %g1, %g1 ! Position PMD offset some more srlx %g6, (PGD_SHIFT - 2), %g5 ! Position PGD offset and %g5, 0xffc, %g5 ! Mask PGD offset /* TLB1 ** ICACHE line 3: Quick VPTE miss */ lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD - brz,pn %g5, 2f ! Valid? + brz,pn %g5, vpte_noent ! Valid? +sparc64_kpte_continue: + sllx %g5, 11, %g5 ! Shift into place sparc64_vpte_continue: - add %g1, %g1, %g1 ! Position PMD offset once again lduwa [%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD - brz,pn %g5, 2f ! Valid? + sllx %g5, 11, %g5 ! Shift into place + brz,pn %g5, vpte_noent ! Valid? sllx %g2, 62, %g1 ! Put _PAGE_VALID into %g1 or %g5, VPTE_BITS, %g5 ! Prepare VPTE data - or %g5, %g1, %g5 ! ... /* TLB1 ** ICACHE line 4: Quick VPTE miss */ + or %g5, %g1, %g5 ! ... mov TLB_SFSR, %g1 ! Restore %g1 value stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB - membar #Sync ! Synchronize ASI stores stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS retry ! Load PTE once again -2: mov TLB_SFSR, %g1 ! Restore %g1 value +vpte_noent: + mov TLB_SFSR, %g1 ! Restore %g1 value stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS done ! Slick trick diff --git a/arch/sparc64/kernel/dtlb_prot.S b/arch/sparc64/kernel/dtlb_prot.S index c4ce36502..067a1d051 100644 --- a/arch/sparc64/kernel/dtlb_prot.S +++ b/arch/sparc64/kernel/dtlb_prot.S @@ -1,4 +1,4 @@ -/* $Id: dtlb_prot.S,v 1.17 1998/05/25 16:59:11 davem Exp $ +/* $Id: dtlb_prot.S,v 1.18 1999/03/02 15:42:14 jj Exp $ * dtlb_prot.S: DTLB protection trap strategy. * This is included directly into the trap table. * @@ -45,13 +45,13 @@ mov TLB_TAG_ACCESS, %g4 ! Prepare reload of vaddr bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 - sethi %hi(1f), %g7 ! Nope, normal fault + ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault /* PROT ** ICACHE line 4: More real fault processing */ - ba,pt %xcc, etrap ! Save state -1: or %g7, %lo(1b), %g7 ! ... - ba,pt %xcc, sparc64_realfault_continue! Now call the fault handler - mov 1, %o2 ! Indicate this was a write + mov 1, %g4 ! Indicate this was a write + nop + nop + nop nop nop nop diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index e60a5dc62..e64e87299 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c @@ -1,4 +1,4 @@ -/* $Id: ebus.c,v 1.33 1998/09/21 05:06:03 jj Exp $ +/* $Id: ebus.c,v 1.36 1999/05/04 03:21:42 davem Exp $ * ebus.c: PCI to EBus bridge device. * * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be) @@ -35,9 +35,6 @@ extern void prom_ebus_intmap_init(struct linux_ebus *); #ifdef CONFIG_SUN_OPENPROMIO extern int openprom_init(void); #endif -#ifdef CONFIG_SPARCAUDIO -extern int sparcaudio_init(void); -#endif #ifdef CONFIG_SUN_AUXIO extern void auxio_probe(void); #endif @@ -263,6 +260,31 @@ __initfunc(void ebus_init(void)) ebus->next = 0; while (ebusnd) { + /* SUNW,pci-qfe uses four empty ebuses on it. + I think we should not consider them here, + as they have half of the properties this + code expects and once we do PCI hot-plug, + we'd have to tweak with the ebus_chain + in the runtime after initialization. -jj */ + if (!prom_getchild (ebusnd)) { + pdev = pci_find_device(PCI_VENDOR_ID_SUN, + PCI_DEVICE_ID_SUN_EBUS, pdev); + if (!pdev) { + if (ebus == ebus_chain) { + ebus_chain = NULL; + printk("ebus: No EBus's found.\n"); +#ifdef PROM_DEBUG + dprintf("ebus: No EBus's found.\n"); +#endif + return; + } + break; + } + + cookie = pdev->sysdata; + ebusnd = cookie->prom_node; + continue; + } printk("ebus%d:", num_ebus); #ifdef PROM_DEBUG dprintf("ebus%d:", num_ebus); @@ -280,6 +302,12 @@ __initfunc(void ebus_init(void)) pci_command |= PCI_COMMAND_MASTER; pci_write_config_word(pdev, PCI_COMMAND, pci_command); + /* Set reasonable cache line size and latency timer values. */ + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64); + + /* NOTE: Cache line size is in 32-bit word units. */ + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x10); + len = prom_getproperty(ebusnd, "reg", (void *)regs, sizeof(regs)); if (len == 0 || len == -1) { @@ -368,9 +396,6 @@ __initfunc(void ebus_init(void)) #ifdef CONFIG_SUN_OPENPROMIO openprom_init(); #endif -#ifdef CONFIG_SPARCAUDIO - sparcaudio_init(); -#endif #ifdef CONFIG_SUN_BPP bpp_init(); #endif diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 7c0d80eb4..4134dcc3a 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1,10 +1,10 @@ -/* $Id: entry.S,v 1.91 1998/10/07 01:27:08 davem Exp $ +/* $Id: entry.S,v 1.103 1999/05/08 03:00:21 davem Exp $ * arch/sparc64/kernel/entry.S: Sparc64 trap low-level entry points. * * Copyright (C) 1995,1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx) - * Copyright (C) 1996,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ #include <linux/config.h> @@ -29,8 +29,17 @@ .text .align 32 + .globl sparc64_vpte_patchme1 + .globl sparc64_vpte_patchme2 +sparc64_vpte_nucleus: +sparc64_vpte_patchme1: + sethi %hi(0), %g5 ! This has to be patched +sparc64_vpte_patchme2: + or %g5, %lo(0), %g5 ! This is patched too + ba,pt %xcc, sparc64_kpte_continue ! Part of dtlb_backend + add %g1, %g1, %g1 ! Finish PMD offset adjustment + /* This is trivial with the new code... */ - .align 32 .globl do_fpdis do_fpdis: ldub [%g6 + AOFF_task_tss + AOFF_thread_fpsaved], %g5 ! Load Group @@ -155,6 +164,39 @@ fpdis_exit2: wr %g0, FPRS_FEF, %fprs ! clean DU/DL bits retry + .globl do_fptrap + .align 32 +do_fptrap: + ldub [%g6 + AOFF_task_tss + AOFF_thread_fpsaved], %g3 + stx %fsr, [%g6 + AOFF_task_tss + AOFF_thread_xfsr] + rd %fprs, %g1 + or %g3, %g1, %g3 + stb %g3, [%g6 + AOFF_task_tss + AOFF_thread_fpsaved] + rd %gsr, %g3 + stb %g3, [%g6 + AOFF_task_tss + AOFF_thread_gsr] + mov SECONDARY_CONTEXT, %g3 + add %g6, AOFF_task_fpregs, %g2 + ldxa [%g3] ASI_DMMU, %g5 + stxa %g0, [%g3] ASI_DMMU + flush %g6 + membar #StoreStore | #LoadStore + andcc %g1, FPRS_DL, %g0 + be,pn %icc, 4f + mov 0x40, %g3 + stda %f0, [%g2] ASI_BLK_S + stda %f16, [%g2 + %g3] ASI_BLK_S + andcc %g1, FPRS_DU, %g0 + be,pn %icc, 5f +4: add %g2, 128, %g2 + stda %f32, [%g2] ASI_BLK_S + stda %f48, [%g2 + %g3] ASI_BLK_S +5: mov SECONDARY_CONTEXT, %g1 + membar #Sync + stxa %g5, [%g1] ASI_DMMU + flush %g6 + ba,pt %xcc, etrap + wr %g0, 0, %fprs + /* The registers for cross calls will be: * * DATA 0: [low 32-bits] Address of function to call, jmp to this @@ -164,69 +206,57 @@ fpdis_exit2: * * With this method we can do most of the cross-call tlb/cache * flushing very quickly. + * + * Current CPU's IRQ worklist table is locked into %g1, + * don't touch. */ - .data - .align 8 - .globl ivec_spurious_cookie -ivec_spurious_cookie: .xword 0 - .text - .align 32 - .globl do_ivec + .align 32 + .globl do_ivec do_ivec: - ldxa [%g0] ASI_INTR_RECEIVE, %g5 - andcc %g5, 0x20, %g0 - be,pn %xcc, do_ivec_return - mov 0x40, %g2 - - /* Load up Interrupt Vector Data 0 register. */ + wr %g0, ASI_UDB_INTR_R, %asi + ldxa [%g0 + 0x40] %asi, %g3 sethi %hi(KERNBASE), %g4 - ldxa [%g2] ASI_UDB_INTR_R, %g3 cmp %g3, %g4 bgeu,pn %xcc, do_ivec_xcall - nop - and %g3, 0x7ff, %g3 - sllx %g3, 3, %g3 - ldx [%g1 + %g3], %g2 - brz,pn %g2, do_ivec_spurious - sethi %hi(0x80000000), %g5 + srlx %g3, 32, %g5 + stxa %g0, [%g0] ASI_INTR_RECEIVE + membar #Sync - or %g2, %g5, %g2 - stx %g2, [%g1 + %g3] + sethi %hi(ivector_table), %g2 + sllx %g3, 5, %g3 + or %g2, %lo(ivector_table), %g2 + add %g2, %g3, %g3 + ldx [%g3 + 0x08], %g2 /* irq_info */ + ldub [%g3 + 0x04], %g4 /* pil */ + brz,pn %g2, do_ivec_spurious + mov 1, %g2 - /* No branches, worse case we don't know about this interrupt - * yet, so we would just write a zero into the softint register - * which is completely harmless. - */ + sllx %g2, %g4, %g2 + sllx %g4, 2, %g4 + lduw [%g1 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ + stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ + stw %g3, [%g1 + %g4] /* irq_work(cpu, pil) = bucket */ wr %g2, 0x0, %set_softint -do_ivec_return: - stxa %g0, [%g0] ASI_INTR_RECEIVE - membar #Sync retry do_ivec_xcall: - srlx %g3, 32, %g5 - add %g2, 0x10, %g2 + ldxa [%g0 + 0x50] %asi, %g6 + srl %g3, 0, %g3 - ldxa [%g2] ASI_UDB_INTR_R, %g6 - add %g2, 0x10, %g2 - ldxa [%g2] ASI_UDB_INTR_R, %g7 + ldxa [%g0 + 0x60] %asi, %g7 stxa %g0, [%g0] ASI_INTR_RECEIVE membar #Sync jmpl %g3, %g0 nop - do_ivec_spurious: - srl %g3, 3, %g3 - sethi %hi(ivec_spurious_cookie), %g2 - stx %g3, [%g2 + %lo(ivec_spurious_cookie)] - stxa %g0, [%g0] ASI_INTR_RECEIVE - membar #Sync + stw %g3, [%g1 + 0x00] /* irq_work(cpu, 0) = bucket */ rdpr %pstate, %g5 + wrpr %g5, PSTATE_IG | PSTATE_AG, %pstate sethi %hi(109f), %g7 ba,pt %xcc, etrap 109: or %g7, %lo(109b), %g7 - call report_spurious_ivec + call catch_disabled_ivec add %sp, STACK_BIAS + REGWIN_SZ, %o0 ba,pt %xcc, rtrap clr %l6 @@ -337,7 +367,7 @@ floppy_fifo_emptied: or %g1, %lo(irq_action), %g1 ldx [%g1 + (11 << 3)], %g3 ! irqaction[floppy_irq] ldx [%g3 + 0x10], %g4 ! action->mask == ino_bucket ptr - ldx [%g4 + 0x18], %g4 ! bucket->iclr + ldx [%g4 + 0x10], %g4 ! bucket->iclr stw %g0, [%g4] ! SYSIO_ICLR_IDLE membar #Sync ! probably not needed... retry @@ -588,12 +618,20 @@ sunos_getgid: /* SunOS's execv() call only specifies the argv argument, the * environment settings are the same as the calling processes. */ - .globl sunos_execv + .globl sunos_execv, sys_execve, sys32_execve +sys_execve: + sethi %hi(sparc_execve), %g1 + ba,pt %xcc, execve_merge + or %g1, %lo(sparc_execve), %g1 sunos_execv: - sethi %hi(sparc32_execve), %g1 - stx %g0, [%sp + STACK_BIAS + REGWIN_SZ + PT_V9_I2] - jmpl %g1 + %lo(sparc32_execve), %g0 - add %sp, STACK_BIAS + REGWIN_SZ, %o0 + stx %g0, [%sp + STACK_BIAS + REGWIN_SZ + PT_V9_I2] +sys32_execve: + sethi %hi(sparc32_execve), %g1 + or %g1, %lo(sparc32_execve), %g1 +execve_merge: + flushw + jmpl %g1, %g0 + add %sp, STACK_BIAS + REGWIN_SZ, %o0 .globl sys_pipe, sys_execve, sys_sigpause, sys_nis_syscall .globl sys_sigsuspend, sys_rt_sigsuspend, sys32_rt_sigsuspend @@ -612,14 +650,6 @@ sys_nis_syscall:sethi %hi(c_sys_nis_syscall), %g1 jmpl %g1 + %lo(c_sys_nis_syscall), %g0 nop -sys_execve: sethi %hi(sparc_execve), %g1 - add %sp, STACK_BIAS + REGWIN_SZ, %o0 - jmpl %g1 + %lo(sparc_execve), %g0 - nop -sys32_execve: sethi %hi(sparc32_execve), %g1 - add %sp, STACK_BIAS + REGWIN_SZ, %o0 - jmpl %g1 + %lo(sparc32_execve), %g0 - nop sys_memory_ordering: sethi %hi(sparc_memory_ordering), %g1 add %sp, STACK_BIAS + REGWIN_SZ, %o1 @@ -719,27 +749,31 @@ sys_ptrace: add %sp, STACK_BIAS + REGWIN_SZ, %o0 .globl sys_fork, sys_vfork, sys_clone, sparc_exit .globl ret_from_syscall .align 32 -sys_fork: -sys_vfork: mov SIGCHLD, %o0 - clr %o1 +sys_vfork: /* Under Linux, vfork and fork are just special cases of clone. */ + sethi %hi(0x4000 | 0x0100 | SIGCHLD), %o0 + or %o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0 + ba,pt %xcc, sys_clone +sys_fork: clr %o1 + mov SIGCHLD, %o0 sys_clone: flushw - mov %o7, %l5 - add %sp, STACK_BIAS + REGWIN_SZ, %o2 movrz %o1, %fp, %o1 - call do_fork - mov %l5, %o7 + nop + ba,pt %xcc, do_fork + add %sp, STACK_BIAS + REGWIN_SZ, %o2 ret_from_syscall: /* Clear SPARC_FLAG_NEWCHILD, switch_to leaves tss.flags in * %o7 for us. Check performance counter stuff too. */ - andn %o7, 0x100, %o7 - sth %o7, [%g6 + AOFF_task_tss + AOFF_thread_flags] #ifdef __SMP__ - sethi %hi(scheduler_lock), %o4 - membar #StoreStore | #LoadStore - stb %g0, [%o4 + %lo(scheduler_lock)] + andn %o7, 0x100, %l0 + mov %g5, %o0 /* 'prev' */ + call schedule_tail + sth %l0, [%g6 + AOFF_task_tss + AOFF_thread_flags] +#else + andn %o7, 0x100, %l0 + sth %l0, [%g6 + AOFF_task_tss + AOFF_thread_flags] #endif - andcc %o7, 0x200, %g0 + andcc %l0, 0x200, %g0 be,pt %icc, 1f nop ldx [%g6 + AOFF_task_tss + AOFF_thread_pcr_reg], %o7 diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 8c92688f1..21c1872a8 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -1,4 +1,4 @@ -/* $Id: head.S,v 1.54 1998/10/06 20:48:30 ecd Exp $ +/* $Id: head.S,v 1.60 1999/04/12 08:08:21 davem Exp $ * head.S: Initial boot code for the Sparc64 port of Linux. * * Copyright (C) 1996,1997 David S. Miller (davem@caip.rutgers.edu) @@ -7,6 +7,7 @@ * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx) */ +#include <linux/config.h> #include <linux/version.h> #include <linux/errno.h> #include <asm/asm_offsets.h> @@ -46,7 +47,7 @@ bootup_user_stack: * HdrS version should be incremented. */ .global root_flags, ram_flags, root_dev - .global ramdisk_image, ramdisk_size + .global sparc_ramdisk_image, sparc_ramdisk_size .globl silo_args .ascii "HdrS" @@ -58,9 +59,9 @@ root_dev: .half 0 ram_flags: .half 0 -ramdisk_image: +sparc_ramdisk_image: .word 0 -ramdisk_size: +sparc_ramdisk_size: .word 0 .xword reboot_command .xword bootstr_len @@ -330,7 +331,7 @@ sun4u_init: /* IMPORTANT NOTE: Whenever making changes here, check * trampoline.S as well. -jj */ .globl setup_tba -setup_tba: +setup_tba: /* i0 = is_starfire */ save %sp, -160, %sp rdpr %tba, %g7 @@ -376,9 +377,34 @@ setup_tba: /* Setup Interrupt globals */ wrpr %o1, (PSTATE_IG|PSTATE_IE), %pstate - sethi %hi(ivector_to_mask), %g5 - or %g5, %lo(ivector_to_mask), %g1 /* IVECTOR table */ - mov 0x40, %g2 /* INTR data 0 register */ +#ifndef __SMP__ + sethi %hi(__up_workvec), %g5 + or %g5, %lo(__up_workvec), %g1 +#else + /* By definition of where we are, this is boot_cpu. */ + sethi %hi(cpu_data), %g5 + or %g5, %lo(cpu_data), %g5 + + brz,pt %i0, not_starfire + sethi %hi(0x1fff4000), %g1 + or %g1, %lo(0x1fff4000), %g1 + sllx %g1, 12, %g1 + or %g1, 0xd0, %g1 + lduwa [%g1] ASI_PHYS_BYPASS_EC_E, %g1 + b,pt %xcc, set_worklist + nop + +not_starfire: + ldxa [%g0] ASI_UPA_CONFIG, %g1 + srlx %g1, 17, %g1 + and %g1, 0x1f, %g1 + + /* In theory this is: &(cpu_data[boot_cpu_id].irq_worklists[0]) */ +set_worklist: + sllx %g1, 7, %g1 + add %g5, %g1, %g5 + add %g5, 64, %g1 +#endif /* Kill PROM timer */ wr %g0, 0, %tick_cmpr @@ -408,6 +434,13 @@ bootup_user_stack_end: empty_bad_page: .skip 0x2000 +#ifdef CONFIG_SBUS +/* This is just a hack to fool make depend config.h discovering + strategy: As the .S files below need config.h, but + make depend does not find it for them, we include config.h + in head.S */ +#endif + ! 0x0000000000408000 #include "ttable.S" diff --git a/arch/sparc64/kernel/ioctl32.c b/arch/sparc64/kernel/ioctl32.c index 42f3de3b8..84d4de363 100644 --- a/arch/sparc64/kernel/ioctl32.c +++ b/arch/sparc64/kernel/ioctl32.c @@ -1,4 +1,4 @@ -/* $Id: ioctl32.c,v 1.55 1998/11/17 07:43:17 davem Exp $ +/* $Id: ioctl32.c,v 1.62 1999/05/01 09:17:44 davem Exp $ * ioctl32.c: Conversion between 32bit and 64bit native ioctls. * * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -26,6 +26,7 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/fd.h> +#include <linux/ppp_defs.h> #include <linux/if_ppp.h> #include <linux/mtio.h> #include <linux/cdrom.h> @@ -35,6 +36,7 @@ #include <linux/vt_kern.h> #include <linux/fb.h> #include <linux/ext2_fs.h> +#include <linux/videodev.h> #include <scsi/scsi.h> /* Ugly hack. */ @@ -52,6 +54,7 @@ #include <asm/openpromio.h> #include <asm/envctrl.h> #include <asm/audioio.h> +#include <asm/ethtool.h> #include <linux/soundcard.h> @@ -116,6 +119,247 @@ static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, arg); } +struct video_tuner32 { + s32 tuner; + u8 name[32]; + u32 rangelow, rangehigh; + u32 flags; + u16 mode, signal; +}; + +static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up) +{ + int i; + + if(get_user(kp->tuner, &up->tuner)) + return -EFAULT; + for(i = 0; i < 32; i++) + __get_user(kp->name[i], &up->name[i]); + __get_user(kp->rangelow, &up->rangelow); + __get_user(kp->rangehigh, &up->rangehigh); + __get_user(kp->flags, &up->flags); + __get_user(kp->mode, &up->mode); + __get_user(kp->signal, &up->signal); + return 0; +} + +static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up) +{ + int i; + + if(put_user(kp->tuner, &up->tuner)) + return -EFAULT; + for(i = 0; i < 32; i++) + __put_user(kp->name[i], &up->name[i]); + __put_user(kp->rangelow, &up->rangelow); + __put_user(kp->rangehigh, &up->rangehigh); + __put_user(kp->flags, &up->flags); + __put_user(kp->mode, &up->mode); + __put_user(kp->signal, &up->signal); + return 0; +} + +struct video_buffer32 { + /* void * */ u32 base; + s32 height, width, depth, bytesperline; +}; + +static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up) +{ + u32 tmp; + + if(get_user(tmp, &up->base)) + return -EFAULT; + kp->base = (void *) ((unsigned long)tmp); + __get_user(kp->height, &up->height); + __get_user(kp->width, &up->width); + __get_user(kp->depth, &up->depth); + __get_user(kp->bytesperline, &up->bytesperline); + return 0; +} + +static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up) +{ + u32 tmp = (u32)((unsigned long)kp->base); + + if(put_user(tmp, &up->base)) + return -EFAULT; + __put_user(kp->height, &up->height); + __put_user(kp->width, &up->width); + __put_user(kp->depth, &up->depth); + __put_user(kp->bytesperline, &up->bytesperline); + return 0; +} + +struct video_clip32 { + s32 x, y, width, height; + /* struct video_clip32 * */ u32 next; +}; + +struct video_window32 { + u32 x, y, width, height, chromakey, flags; + /* struct video_clip32 * */ u32 clips; + s32 clipcount; +}; + +static void free_kvideo_clips(struct video_window *kp) +{ + struct video_clip *cp; + + cp = kp->clips; + if(cp != NULL) + kfree(cp); +} + +static int get_video_window32(struct video_window *kp, struct video_window32 *up) +{ + struct video_clip32 *ucp; + struct video_clip *kcp; + int nclips, err, i; + u32 tmp; + + if(get_user(kp->x, &up->x)) + return -EFAULT; + __get_user(kp->y, &up->y); + __get_user(kp->width, &up->width); + __get_user(kp->height, &up->height); + __get_user(kp->chromakey, &up->chromakey); + __get_user(kp->flags, &up->flags); + __get_user(kp->clipcount, &up->clipcount); + __get_user(tmp, &up->clips); + ucp = (struct video_clip32 *)A(tmp); + kp->clips = NULL; + + nclips = kp->clipcount; + if(nclips == 0) + return 0; + + if(ucp == 0) + return -EINVAL; + + /* Peculiar interface... */ + if(nclips < 0) + nclips = VIDEO_CLIPMAP_SIZE; + + kcp = kmalloc(nclips * sizeof(struct video_clip), GFP_KERNEL); + err = -ENOMEM; + if(kcp == NULL) + goto cleanup_and_err; + + kp->clips = kcp; + for(i = 0; i < nclips; i++) { + __get_user(kcp[i].x, &ucp[i].x); + __get_user(kcp[i].y, &ucp[i].y); + __get_user(kcp[i].width, &ucp[i].width); + __get_user(kcp[i].height, &ucp[i].height); + kcp[nclips].next = NULL; + } + + return 0; + +cleanup_and_err: + free_kvideo_clips(kp); + return err; +} + +/* You get back everything except the clips... */ +static int put_video_window32(struct video_window *kp, struct video_window32 *up) +{ + if(put_user(kp->x, &up->x)) + return -EFAULT; + __put_user(kp->y, &up->y); + __put_user(kp->width, &up->width); + __put_user(kp->height, &up->height); + __put_user(kp->chromakey, &up->chromakey); + __put_user(kp->flags, &up->flags); + __put_user(kp->clipcount, &up->clipcount); + return 0; +} + +#define VIDIOCGTUNER32 _IOWR('v',4, struct video_tuner32) +#define VIDIOCSTUNER32 _IOW('v',5, struct video_tuner32) +#define VIDIOCGWIN32 _IOR('v',9, struct video_window32) +#define VIDIOCSWIN32 _IOW('v',10, struct video_window32) +#define VIDIOCGFBUF32 _IOR('v',11, struct video_buffer32) +#define VIDIOCSFBUF32 _IOW('v',12, struct video_buffer32) +#define VIDIOCGFREQ32 _IOR('v',14, u32) +#define VIDIOCSFREQ32 _IOW('v',15, u32) + +static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + union { + struct video_tuner vt; + struct video_buffer vb; + struct video_window vw; + unsigned long vx; + } karg; + mm_segment_t old_fs = get_fs(); + void *up = (void *)arg; + int err = 0; + + /* First, convert the command. */ + switch(cmd) { + case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break; + case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break; + case VIDIOCGWIN32: cmd = VIDIOCGWIN; break; + case VIDIOCSWIN32: cmd = VIDIOCSWIN; break; + case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break; + case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break; + case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break; + case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break; + }; + + switch(cmd) { + case VIDIOCSTUNER: + case VIDIOCGTUNER: + err = get_video_tuner32(&karg.vt, up); + break; + + case VIDIOCSWIN: + err = get_video_window32(&karg.vw, up); + break; + + case VIDIOCSFBUF: + err = get_video_buffer32(&karg.vb, up); + break; + + case VIDIOCSFREQ: + err = get_user(karg.vx, (u32 *)up); + break; + }; + if(err) + goto out; + + set_fs(KERNEL_DS); + err = sys_ioctl(fd, cmd, (unsigned long)&karg); + set_fs(old_fs); + + if(cmd == VIDIOCSWIN) + free_kvideo_clips(&karg.vw); + + if(err == 0) { + switch(cmd) { + case VIDIOCGTUNER: + err = put_video_tuner32(&karg.vt, up); + break; + + case VIDIOCGWIN: + err = put_video_window32(&karg.vw, up); + break; + + case VIDIOCGFBUF: + err = put_video_buffer32(&karg.vb, up); + break; + + case VIDIOCGFREQ: + err = put_user(((u32)karg.vx), (u32 *)up); + break; + }; + } +out: + return err; +} + struct timeval32 { int tv_sec; int tv_usec; @@ -253,11 +497,23 @@ static inline int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long ar case SIOCGPPPSTATS: case SIOCGPPPCSTATS: case SIOCGPPPVER: + case SIOCETHTOOL: if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32))) return -EFAULT; ifr.ifr_data = (__kernel_caddr_t)get_free_page(GFP_KERNEL); if (!ifr.ifr_data) return -EAGAIN; + if(cmd == SIOCETHTOOL) { + u32 data; + + __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); + if(copy_from_user(ifr.ifr_data, + (char *)A(data), + sizeof(struct ethtool_cmd))) { + free_page((unsigned long)ifr.ifr_data); + return -EFAULT; + } + } break; default: if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32))) @@ -280,17 +536,21 @@ static inline int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long ar case SIOCGIFBRDADDR: case SIOCGIFDSTADDR: case SIOCGIFNETMASK: + case SIOCGIFTXQLEN: if (copy_to_user((struct ifreq32 *)arg, &ifr, sizeof(struct ifreq32))) return -EFAULT; break; case SIOCGPPPSTATS: case SIOCGPPPCSTATS: case SIOCGPPPVER: + case SIOCETHTOOL: { u32 data; int len; __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data)); + if(cmd == SIOCETHTOOL) + len = sizeof(struct ethtool_cmd); if(cmd == SIOCGPPPVER) len = strlen(PPP_VERSION) + 1; else if(cmd == SIOCGPPPCSTATS) @@ -298,7 +558,9 @@ static inline int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long ar else len = sizeof(struct ppp_stats); - if (copy_to_user((char *)A(data), ifr.ifr_data, len)) + len = copy_to_user((char *)A(data), ifr.ifr_data, len); + free_page((unsigned long)ifr.ifr_data); + if(len) return -EFAULT; break; } @@ -558,8 +820,7 @@ static int fb_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) cmap.transp = kmalloc(cmap.len * sizeof(__u16), GFP_KERNEL); if (!cmap.transp) goto out; - } else - cmap.transp = NULL; + } if (cmd == FBIOGETCMAP) break; @@ -1458,6 +1719,9 @@ asmlinkage int sys32_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) case SIOCGPPPSTATS: case SIOCGPPPCSTATS: case SIOCGPPPVER: + case SIOCGIFTXQLEN: + case SIOCSIFTXQLEN: + case SIOCETHTOOL: error = dev_ifsioc(fd, cmd, arg); goto out; @@ -1579,11 +1843,22 @@ asmlinkage int sys32_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) error = do_ext2_ioctl(fd, cmd, arg); goto out; + case VIDIOCGTUNER32: + case VIDIOCSTUNER32: + case VIDIOCGWIN32: + case VIDIOCSWIN32: + case VIDIOCGFBUF32: + case VIDIOCSFBUF32: + case VIDIOCGFREQ32: + case VIDIOCSFREQ32: + error = do_video_ioctl(fd, cmd, arg); + goto out; + /* List here exlicitly which ioctl's are known to have * compatable types passed or none at all... */ - /* Bit T */ + /* Big T */ case TCGETA: case TCSETA: case TCSETAW: @@ -1618,6 +1893,8 @@ asmlinkage int sys32_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) case TIOCSPGRP: case TIOCGPGRP: case TIOCSCTTY: + case TIOCGPTN: + case TIOCSPTLCK: /* Big F */ case FBIOGTYPE: @@ -1770,6 +2047,33 @@ asmlinkage int sys32_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) case VUIDSFORMAT: case VUIDGFORMAT: + /* Little v, the video4linux ioctls */ + case VIDIOCGCAP: + case VIDIOCGCHAN: + case VIDIOCSCHAN: + case VIDIOCGPICT: + case VIDIOCSPICT: + case VIDIOCCAPTURE: + case VIDIOCKEY: + case VIDIOCGAUDIO: + case VIDIOCSAUDIO: + case VIDIOCSYNC: + case VIDIOCMCAPTURE: + case VIDIOCGMBUF: + case VIDIOCGUNIT: + case VIDIOCGCAPTURE: + case VIDIOCSCAPTURE: + + /* BTTV specific... */ + case _IOW('v', BASE_VIDIOCPRIVATE+0, char [256]): + case _IOR('v', BASE_VIDIOCPRIVATE+1, char [256]): + case _IOR('v' , BASE_VIDIOCPRIVATE+2, unsigned int): + case _IOW('v' , BASE_VIDIOCPRIVATE+3, char [16]): /* struct bttv_pll_info */ + case _IOR('v' , BASE_VIDIOCPRIVATE+4, int): + case _IOR('v' , BASE_VIDIOCPRIVATE+5, int): + case _IOR('v' , BASE_VIDIOCPRIVATE+6, int): + case _IOR('v' , BASE_VIDIOCPRIVATE+7, int): + /* Little p (/dev/rtc, /dev/envctrl, etc.) */ case RTCGET: case RTCSET: diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index d8707261f..6eccb883a 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -1,4 +1,4 @@ -/* $Id: irq.c,v 1.66 1998/10/21 15:02:25 ecd Exp $ +/* $Id: irq.c,v 1.76 1999/04/02 14:54:30 davem Exp $ * irq.c: UltraSparc IRQ handling/init/registry. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -41,20 +41,30 @@ #define SA_DMA_SYNC 0x200 #ifdef __SMP__ -void distribute_irqs(void); -static int irqs_have_been_distributed = 0; +static void distribute_irqs(void); #endif -/* UPA nodes send interrupt packet to UltraSparc with first data reg value - * low 5 bits holding the IRQ identifier being delivered. We must translate - * this into a non-vector IRQ so we can set the softint on this cpu. To - * make things even more swift we store the complete mask here. +/* UPA nodes send interrupt packet to UltraSparc with first data reg + * value low 5 (7 on Starfire) bits holding the IRQ identifier being + * delivered. We must translate this into a non-vector IRQ so we can + * set the softint on this cpu. + * + * To make processing these packets efficient and race free we use + * an array of irq buckets below. The interrupt vector handler in + * entry.S feeds incoming packets into per-cpu pil-indexed lists. + * The IVEC handler does not need to act atomically, the PIL dispatch + * code uses CAS to get an atomic snapshot of the list and clear it + * at the same time. */ -#define NUM_HARD_IVECS 2048 -#define NUM_IVECS (NUM_HARD_IVECS + 64) /* For SMP IRQ distribution alg. */ +struct ino_bucket ivector_table[NUM_IVECS] __attribute__ ((aligned (64))); -unsigned long ivector_to_mask[NUM_IVECS]; +#ifndef __SMP__ +unsigned int __up_workvec[16] __attribute__ ((aligned (64))); +#define irq_work(__cpu, __pil) &(__up_workvec[(__pil)]) +#else +#define irq_work(__cpu, __pil) &(cpu_data[(__cpu)].irq_worklists[(__pil)]) +#endif /* This is based upon code in the 32-bit Sparc kernel written mostly by * David Redman (djhr@tadpole.co.uk). @@ -63,30 +73,21 @@ unsigned long ivector_to_mask[NUM_IVECS]; static struct irqaction static_irqaction[MAX_STATIC_ALLOC]; static int static_irq_count = 0; -/* XXX Must be exported so that fast IRQ handlers can get at it... -DaveM */ +/* This is exported so that fast IRQ handlers can get at it... -DaveM */ struct irqaction *irq_action[NR_IRQS+1] = { NULL, NULL, NULL, NULL, NULL, NULL , NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL , NULL, NULL }; -#define IBF_DMA_SYNC 0x01 -#define IBF_PCI 0x02 -#define IBF_ACTIVE 0x04 +/* Only 8-bits are available, be careful. -DaveM */ +#define IBF_DMA_SYNC 0x01 /* DMA synchronization behind PCI bridge needed. */ +#define IBF_PCI 0x02 /* Indicates PSYCHO/SCHIZO PCI interrupt. */ +#define IBF_ACTIVE 0x04 /* This interrupt is active and has a handler. */ +#define IBF_MULTI 0x08 /* On PCI, indicates shared bucket. */ -#define __imap(bucket) ((bucket)->iclr + (bucket)->imap_off) #define __bucket(irq) ((struct ino_bucket *)(unsigned long)(irq)) #define __irq(bucket) ((unsigned int)(unsigned long)(bucket)) -static struct ino_bucket *bucket_base, *buckets, *endbuckets; - -__initfunc(unsigned long irq_init(unsigned long start_mem, unsigned long end_mem)) -{ - start_mem = (start_mem + 15) & ~15; - bucket_base = buckets = (struct ino_bucket *)start_mem; - endbuckets = buckets + 2048; - return (unsigned long)endbuckets; -} - int get_irq_list(char *buf) { int i, len = 0; @@ -104,7 +105,7 @@ int get_irq_list(char *buf) #else for (j = 0; j < smp_num_cpus; j++) len += sprintf(buf + len, "%10u ", - kstat.irqs[cpu_logical_map(j)][i]); + kstat.irqs[cpu_logical_map(j)][i]); #endif len += sprintf(buf + len, "%c %s", (action->flags & SA_INTERRUPT) ? '+' : ' ', @@ -224,8 +225,7 @@ unsigned char psycho_ino_to_pil[] = { 1, /* Power Management */ }; -/* INO number to IMAP register offset for PSYCHO external IRQ's. - */ +/* INO number to IMAP register offset for PSYCHO external IRQ's. */ #define psycho_offset(x) ((unsigned long)(&(((struct psycho_regs *)0)->x))) #define psycho_imap_offset(ino) \ @@ -241,16 +241,27 @@ unsigned char psycho_ino_to_pil[] = { /* Now these are always passed a true fully specified sun4u INO. */ void enable_irq(unsigned int irq) { + extern int this_is_starfire; struct ino_bucket *bucket = __bucket(irq); - unsigned long tid; unsigned int *imap; + unsigned long tid; - imap = __imap(bucket); - if (!imap) return; + imap = bucket->imap; + if (!imap) + return; - /* We send it to our UPA MID, for SMP this will be different. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (tid) : "i" (ASI_UPA_CONFIG)); - tid = ((tid & UPA_CONFIG_MID) << 9); + if(this_is_starfire == 0) { + /* We set it to our UPA MID. */ + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (tid) + : "i" (ASI_UPA_CONFIG)); + tid = ((tid & UPA_CONFIG_MID) << 9); + } else { + extern unsigned int starfire_translate(unsigned int *imap, + unsigned int upaid); + + tid = (starfire_translate(imap, current->processor) << 26); + } /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product * of this SYSIO's preconfigured IGN in the SYSIO Control @@ -269,35 +280,83 @@ void disable_irq(unsigned int irq) struct ino_bucket *bucket = __bucket(irq); unsigned int *imap; - imap = __imap(bucket); - if (!imap) return; - - /* NOTE: We do not want to futz with the IRQ clear registers - * and move the state to IDLE, the SCSI code does call - * disable_irq() to assure atomicity in the queue cmd - * SCSI adapter driver code. Thus we'd lose interrupts. - */ - *imap &= ~(SYSIO_IMAP_VALID); + imap = bucket->imap; + if (imap != NULL) { + /* NOTE: We do not want to futz with the IRQ clear registers + * and move the state to IDLE, the SCSI code does call + * disable_irq() to assure atomicity in the queue cmd + * SCSI adapter driver code. Thus we'd lose interrupts. + */ + *imap &= ~(SYSIO_IMAP_VALID); + } } +/* The timer is the one "weird" interrupt which is generated by + * the CPU %tick register and not by some normal vectored interrupt + * source. To handle this special case, we use this dummy INO bucket. + */ +static struct ino_bucket pil0_dummy_bucket = { + 0, /* irq_chain */ + 0, /* pil */ + 0, /* pending */ + 0, /* flags */ + 0, /* __unused */ + NULL, /* irq_info */ + NULL, /* iclr */ + NULL, /* imap */ +}; + unsigned int build_irq(int pil, int inofixup, unsigned int *iclr, unsigned int *imap) { - if (buckets == endbuckets) - panic("Out of IRQ buckets. Should not happen.\n"); - buckets->pil = pil; - if (pil && (!iclr || !imap)) { - prom_printf("Invalid build_irq %d %d %016lx %016lx\n", pil, inofixup, iclr, imap); + struct ino_bucket *bucket; + int ino; + + if(pil == 0) { + if(iclr != NULL || imap != NULL) { + prom_printf("Invalid dummy bucket for PIL0 (%p:%p)\n", + iclr, imap); + prom_halt(); + } + return __irq(&pil0_dummy_bucket); + } + + /* RULE: Both must be specified in all other cases. */ + if (iclr == NULL || imap == NULL) { + prom_printf("Invalid build_irq %d %d %016lx %016lx\n", + pil, inofixup, iclr, imap); prom_halt(); } - if (imap) - buckets->ino = (*imap & (SYSIO_IMAP_IGN | SYSIO_IMAP_INO)) + inofixup; - else - buckets->ino = 0; - - buckets->iclr = iclr; - buckets->flags = 0; - buckets->imap_off = imap - iclr; - return __irq(buckets++); + + ino = (*imap & (SYSIO_IMAP_IGN | SYSIO_IMAP_INO)) + inofixup; + if(ino > NUM_IVECS) { + prom_printf("Invalid INO %04x (%d:%d:%016lx:%016lx)\n", + ino, pil, inofixup, iclr, imap); + prom_halt(); + } + + /* Ok, looks good, set it up. Don't touch the irq_chain or + * the pending flag. + */ + bucket = &ivector_table[ino]; + if ((bucket->flags & IBF_ACTIVE) || + (bucket->irq_info != NULL)) { + /* This is a gross fatal error if it happens here. */ + prom_printf("IRQ: Trying to reinit INO bucket, fatal error.\n"); + prom_printf("IRQ: Request INO %04x (%d:%d:%016lx:%016lx)\n", + ino, pil, inofixup, iclr, imap); + prom_printf("IRQ: Existing (%d:%016lx:%016lx)\n", + bucket->pil, bucket->iclr, bucket->imap); + prom_printf("IRQ: Cannot continue, halting...\n"); + prom_halt(); + } + bucket->imap = imap; + bucket->iclr = iclr; + bucket->pil = pil; + bucket->flags = 0; + + bucket->irq_info = NULL; + + return __irq(bucket); } unsigned int sbus_build_irq(void *buscookie, unsigned int ino) @@ -382,8 +441,44 @@ unsigned int psycho_build_irq(void *buscookie, int imap_off, int ino, int need_d if(!(ino & 0x20)) inofixup = ino & 0x03; - bucket = __bucket(build_irq(pil, inofixup, iclr, imap)); - + /* First check for sharing. */ + ino = (*imap & (SYSIO_IMAP_IGN | SYSIO_IMAP_INO)) + inofixup; + if (ino > NUM_IVECS) { + prom_printf("PSYCHO: Invalid INO %04x (%d:%d:%016lx:%016lx)\n", + ino, pil, inofixup, iclr, imap); + prom_halt(); + } + bucket = &ivector_table[ino]; + if(bucket->flags & IBF_ACTIVE) { + void *old_handler = bucket->irq_info; + unsigned long flags; + + if(old_handler == NULL) { + prom_printf("PSYCHO: Active bucket, but no handler.\n"); + prom_halt(); + } + save_and_cli(flags); + if((bucket->flags & IBF_MULTI) == 0) { + void **vector; + + vector = kmalloc(sizeof(void *) * 4, + GFP_KERNEL); + + /* We might have slept. */ + if((bucket->flags & IBF_MULTI) != 0) { + kfree(vector); + } else { + vector[0] = old_handler; + vector[1] = vector[2] = vector[3] = NULL; + bucket->irq_info = vector; + bucket->flags |= IBF_MULTI; + } + } + restore_flags(flags); + } else { + /* Just init the bucket */ + bucket = __bucket(build_irq(pil, inofixup, iclr, imap)); + } if (need_dma_sync) bucket->flags |= IBF_DMA_SYNC; @@ -392,6 +487,20 @@ unsigned int psycho_build_irq(void *buscookie, int imap_off, int ino, int need_d } #endif +static void atomic_bucket_insert(struct ino_bucket *bucket) +{ + unsigned long pstate; + unsigned int *ent; + + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + __asm__ __volatile__("wrpr %0, %1, %%pstate" + : : "r" (pstate), "i" (PSTATE_IE)); + ent = irq_work(smp_processor_id(), bucket->pil); + bucket->irq_chain = *ent; + *ent = __irq(bucket); + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); +} + int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char *name, void *dev_id) { @@ -400,11 +509,16 @@ int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *) unsigned long flags; int pending = 0; - if (irq < 0x400000 || (irq & 0x80000000)) { - prom_printf("request_irq with old style irq %08x %016lx\n", irq, handler); - prom_halt(); - } - + if ((bucket != &pil0_dummy_bucket) && + (bucket < &ivector_table[0] || + bucket >= &ivector_table[NUM_IVECS])) { + unsigned int *caller; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + printk(KERN_CRIT "request_irq: Old style IRQ registry attempt " + "from %p, irq %08x.\n", caller, irq); + return -EINVAL; + } if(!handler) return -EINVAL; @@ -429,24 +543,26 @@ int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *) } } + save_and_cli(flags); + action = *(bucket->pil + irq_action); if(action) { if((action->flags & SA_SHIRQ) && (irqflags & SA_SHIRQ)) for (tmp = action; tmp->next; tmp = tmp->next) ; - else + else { + restore_flags(flags); return -EBUSY; - + } if((action->flags & SA_INTERRUPT) ^ (irqflags & SA_INTERRUPT)) { printk("Attempt to mix fast and slow interrupts on IRQ%d " "denied\n", bucket->pil); + restore_flags(flags); return -EBUSY; } action = NULL; /* Or else! */ } - save_and_cli(flags); - /* If this is flagged as statically allocated then we use our * private struct which is never freed. */ @@ -466,12 +582,65 @@ int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *) return -ENOMEM; } - if (irqflags & SA_IMAP_MASKED) { - pending = ((ivector_to_mask[bucket->ino] & 0x80000000) != 0); - ivector_to_mask[bucket->ino] = (1 << bucket->pil); - if(pending) - ivector_to_mask[bucket->ino] |= 0x80000000; + if ((irqflags & SA_IMAP_MASKED) == 0) { + bucket->irq_info = action; bucket->flags |= IBF_ACTIVE; + } else { + if((bucket->flags & IBF_ACTIVE) != 0) { + void *orig = bucket->irq_info; + void **vector = NULL; + + if((bucket->flags & IBF_PCI) == 0) { + printk("IRQ: Trying to share non-PCI bucket.\n"); + goto free_and_ebusy; + } + if((bucket->flags & IBF_MULTI) == 0) { + vector = kmalloc(sizeof(void *) * 4, GFP_KERNEL); + if(vector == NULL) + goto free_and_enomem; + + /* We might have slept. */ + if ((bucket->flags & IBF_MULTI) != 0) { + int ent; + + kfree(vector); + vector = (void **)bucket->irq_info; + for(ent = 0; ent < 4; ent++) { + if (vector[ent] == NULL) { + vector[ent] = action; + break; + } + } + if (ent == 4) + goto free_and_ebusy; + } else { + vector[0] = orig; + vector[1] = action; + vector[2] = NULL; + vector[3] = NULL; + bucket->irq_info = vector; + bucket->flags |= IBF_MULTI; + } + } else { + int ent; + + vector = (void **)orig; + for(ent = 0; ent < 4; ent++) { + if(vector[ent] == NULL) { + vector[ent] = action; + break; + } + } + if (ent == 4) + goto free_and_ebusy; + } + } else { + bucket->irq_info = action; + bucket->flags |= IBF_ACTIVE; + } + pending = bucket->pending; + if(pending) + bucket->pending = 0; } action->mask = (unsigned long) bucket; @@ -489,15 +658,26 @@ int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *) enable_irq(irq); /* We ate the IVEC already, this makes sure it does not get lost. */ - if(pending) + if(pending) { + atomic_bucket_insert(bucket); set_softint(1 << bucket->pil); - + } restore_flags(flags); + #ifdef __SMP__ - if(irqs_have_been_distributed) - distribute_irqs(); + distribute_irqs(); #endif return 0; + +free_and_ebusy: + kfree(action); + restore_flags(flags); + return -EBUSY; + +free_and_enomem: + kfree(action); + restore_flags(flags); + return -ENOMEM; } void free_irq(unsigned int irq, void *dev_id) @@ -507,9 +687,15 @@ void free_irq(unsigned int irq, void *dev_id) unsigned long flags; struct ino_bucket *bucket = __bucket(irq), *bp; - if (irq < 0x400000 || (irq & 0x80000000)) { - prom_printf("free_irq with old style irq %08x\n", irq); - prom_halt(); + if ((bucket != &pil0_dummy_bucket) && + (bucket < &ivector_table[0] || + bucket >= &ivector_table[NUM_IVECS])) { + unsigned int *caller; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + printk(KERN_CRIT "free_irq: Old style IRQ removal attempt " + "from %p, irq %08x.\n", caller, irq); + return; } action = *(bucket->pil + irq_action); @@ -545,27 +731,59 @@ void free_irq(unsigned int irq, void *dev_id) *(bucket->pil + irq_action) = action->next; if(action->flags & SA_IMAP_MASKED) { - unsigned int *imap = __imap(bucket); + unsigned int *imap = bucket->imap; + void **vector, *orig; + int ent; + + orig = bucket->irq_info; + vector = (void **)orig; + + if ((bucket->flags & IBF_MULTI) != 0) { + int other = 0; + void *orphan = NULL; + for(ent = 0; ent < 4; ent++) { + if(vector[ent] == action) + vector[ent] = NULL; + else if(vector[ent] != NULL) { + orphan = vector[ent]; + other++; + } + } - /* - * Only free when no other shared irq uses this bucket. - */ - tmp = *(bucket->pil + irq_action); - for( ; tmp; tmp = tmp->next) - if ((struct ino_bucket *)tmp->mask == bucket) + /* Only free when no other shared irq + * uses this bucket. + */ + if(other) { + if (other == 1) { + /* Convert back to non-shared bucket. */ + bucket->irq_info = orphan; + bucket->flags &= ~(IBF_MULTI); + kfree(vector); + } goto out; + } + } else { + bucket->irq_info = NULL; + } - ivector_to_mask[bucket->ino] = 0; - + /* This unique interrupt source is now inactive. */ bucket->flags &= ~IBF_ACTIVE; - for (bp = bucket_base; bp < endbuckets; bp++) - if (__imap(bp) == imap && (bp->flags & IBF_ACTIVE)) + + /* See if any other buckets share this bucket's IMAP + * and are still active. + */ + for(ent = 0; ent < NUM_IVECS; ent++) { + bp = &ivector_table[ent]; + if(bp != bucket && + bp->imap == imap && + (bp->flags & IBF_ACTIVE) != 0) break; - /* - * Only disable when no other sub-irq levels of - * the same imap are active. + } + + /* Only disable when no other sub-irq levels of + * the same IMAP are active. */ - if (bp == endbuckets) + if (ent == NUM_IVECS) disable_irq(irq); } @@ -607,10 +825,10 @@ static void show(char * str) int cpu = smp_processor_id(); printk("\n%s, CPU %d:\n", str, cpu); - printk("irq: %d [%d %d]\n", + printk("irq: %d [%ld %ld]\n", atomic_read(&global_irq_count), cpu_data[0].irq_count, cpu_data[1].irq_count); - printk("bh: %d [%d %d]\n", + printk("bh: %d [%ld %ld]\n", (spin_is_locked(&global_bh_count) ? 1 : 0), cpu_data[0].bh_count, cpu_data[1].bh_count); } @@ -755,57 +973,56 @@ void __global_restore_flags(unsigned long flags) #endif /* __SMP__ */ -void report_spurious_ivec(struct pt_regs *regs) +void catch_disabled_ivec(struct pt_regs *regs) { - extern unsigned long ivec_spurious_cookie; - -#if 0 - printk("IVEC: Spurious interrupt vector (%016lx) received at (%016lx)\n", - ivec_spurious_cookie, regs->tpc); -#endif + int cpu = smp_processor_id(); + struct ino_bucket *bucket = __bucket(*irq_work(cpu, 0)); /* We can actually see this on Ultra/PCI PCI cards, which are bridges * to other devices. Here a single IMAP enabled potentially multiple * unique interrupt sources (which each do have a unique ICLR register. * * So what we do is just register that the IVEC arrived, when registered - * for real the request_irq() code will check the high bit and signal + * for real the request_irq() code will check the bit and signal * a local CPU interrupt for it. */ - ivector_to_mask[ivec_spurious_cookie] |= (0x80000000); +#if 0 + printk("IVEC: Spurious interrupt vector (%x) received at (%016lx)\n", + bucket - &ivector_table[0], regs->tpc); +#endif + *irq_work(cpu, 0) = 0; + bucket->pending = 1; } -void unexpected_irq(int irq, void *dev_cookie, struct pt_regs *regs) -{ - int i; - struct irqaction *action; - unsigned int cpu_irq; - - cpu_irq = irq & NR_IRQS; - action = *(cpu_irq + irq_action); - - prom_printf("Unexpected IRQ[%d]: ", irq); - prom_printf("PC[%016lx] NPC[%016lx] FP[%016lx]\n", - regs->tpc, regs->tnpc, regs->u_regs[14]); - - if(action) { - prom_printf("Expecting: "); - for(i = 0; i < 16; i++) { - if(action->handler) - prom_printf("[%s:%d:0x%016lx] ", action->name, - i, (unsigned long) action->handler); - } - } - prom_printf("AIEEE\n"); - prom_printf("bogus interrupt received\n"); - prom_cmdline (); -} +/* Tune this... */ +#define FORWARD_VOLUME 12 void handler_irq(int irq, struct pt_regs *regs) { - struct ino_bucket *bucket = NULL; - struct irqaction *action, *act; + struct ino_bucket *bp, *nbp; int cpu = smp_processor_id(); +#ifdef __SMP__ + extern int this_is_starfire; + int should_forward = (this_is_starfire == 0 && + irq < 10 && + current->pid != 0); + unsigned int buddy = 0; + + /* 'cpu' is the MID (ie. UPAID), calculate the MID + * of our buddy. + */ + if(should_forward != 0) { + buddy = cpu_number_map[cpu] + 1; + if (buddy >= NR_CPUS || + (buddy = cpu_logical_map(buddy)) == -1) + buddy = cpu_logical_map(0); + + /* Voo-doo programming. */ + if(cpu_data[buddy].idle_volume < FORWARD_VOLUME) + should_forward = 0; + buddy <<= 26; + } +#endif #ifndef __SMP__ /* @@ -817,30 +1034,55 @@ void handler_irq(int irq, struct pt_regs *regs) clear_softint(1 << irq); irq_enter(cpu, irq); - action = *(irq + irq_action); kstat.irqs[cpu][irq]++; - if(!action) { - unexpected_irq(irq, 0, regs); - } else { - act = action; - do { - if(act->flags & SA_IMAP_MASKED) { - bucket = (struct ino_bucket *)act->mask; - if(!(ivector_to_mask[bucket->ino] & 0x80000000)) - continue; + + /* Sliiiick... */ +#ifndef __SMP__ + bp = ((irq != 0) ? + __bucket(xchg32(irq_work(cpu, irq), 0)) : + &pil0_dummy_bucket); +#else + bp = __bucket(xchg32(irq_work(cpu, irq), 0)); +#endif + for( ; bp != NULL; bp = nbp) { + unsigned char flags = bp->flags; + + nbp = __bucket(bp->irq_chain); + if((flags & IBF_ACTIVE) != 0) { + if((flags & IBF_MULTI) == 0) { + struct irqaction *ap = bp->irq_info; + ap->handler(__irq(bp), ap->dev_id, regs); + } else { + void **vector = (void **)bp->irq_info; + int ent; + for(ent = 0; ent < 4; ent++) { + struct irqaction *ap = vector[ent]; + if(ap != NULL) + ap->handler(__irq(bp), ap->dev_id, regs); + } } - act->handler(__irq(bucket), act->dev_id, regs); - } while((act = act->next) != NULL); - act = action; - do { - if(act->flags & SA_IMAP_MASKED) { - bucket = (struct ino_bucket *)act->mask; - if(!(ivector_to_mask[bucket->ino] & 0x80000000)) - continue; - ivector_to_mask[bucket->ino] &= ~(0x80000000); - *(bucket->iclr) = SYSIO_ICLR_IDLE; + /* Only the dummy bucket lacks IMAP/ICLR. */ + if(bp->pil != 0) { +#ifdef __SMP__ + /* Ok, here is what is going on: + * 1) Retargeting IRQs on Starfire is very + * expensive so just forget about it on them. + * 2) Moving around very high priority interrupts + * is a losing game. + * 3) If the current cpu is idle, interrupts are + * useful work, so keep them here. But do not + * pass to our neighbour if he is not very idle. + */ + if (should_forward != 0) { + /* Push it to our buddy. */ + should_forward = 0; + *(bp->imap) = (buddy | SYSIO_IMAP_VALID); + } +#endif + *(bp->iclr) = SYSIO_ICLR_IDLE; } - } while((act = act->next) != NULL); + } else + bp->pending = 1; } irq_exit(cpu, irq); } @@ -856,10 +1098,13 @@ void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) irq_enter(cpu, irq); kstat.irqs[cpu][irq]++; + + *(irq_work(cpu, irq)) = 0; bucket = (struct ino_bucket *)action->mask; + floppy_interrupt(irq, dev_cookie, regs); - ivector_to_mask[bucket->ino] &= ~(0x80000000); *(bucket->iclr) = SYSIO_ICLR_IDLE; + irq_exit(cpu, irq); } #endif @@ -897,11 +1142,21 @@ int request_fast_irq(unsigned int irq, struct ino_bucket *bucket = __bucket(irq); unsigned long flags; - if (irq < 0x400000 || (irq & 0x80000000)) { - prom_printf("request_irq with old style irq %08x %016lx\n", irq, handler); - prom_halt(); - } + /* No pil0 dummy buckets allowed here. */ + if (bucket < &ivector_table[0] || + bucket >= &ivector_table[NUM_IVECS]) { + unsigned int *caller; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt " + "from %p, irq %08x.\n", caller, irq); + return -EINVAL; + } + /* Only IMAP style interrupts can be registered as fast. */ + if(bucket->pil == 0) + return -EINVAL; + if(!handler) return -EINVAL; @@ -919,6 +1174,7 @@ int request_fast_irq(unsigned int irq, printk("request_fast_irq: Trying to register yet already owned.\n"); return -EBUSY; } + save_and_cli(flags); if(irqflags & SA_STATIC_ALLOC) { if(static_irq_count < MAX_STATIC_ALLOC) @@ -936,7 +1192,8 @@ int request_fast_irq(unsigned int irq, } install_fast_irq(bucket->pil, handler); - ivector_to_mask[bucket->ino] = (1 << bucket->pil); + bucket->irq_info = action; + bucket->flags |= IBF_ACTIVE; action->mask = (unsigned long) bucket; action->handler = handler; @@ -949,9 +1206,9 @@ int request_fast_irq(unsigned int irq, enable_irq(irq); restore_flags(flags); + #ifdef __SMP__ - if(irqs_have_been_distributed) - distribute_irqs(); + distribute_irqs(); #endif return 0; } @@ -1025,50 +1282,51 @@ void init_timers(void (*cfunc)(int, void *, struct pt_regs *), } #ifdef __SMP__ -/* Called from smp_commence, when we know how many cpus are in the system - * and can have device IRQ's directed at them. - */ -/* #define SMP_IRQ_VERBOSE */ -void distribute_irqs(void) +static int retarget_one_irq(struct irqaction *p, int goal_cpu) +{ + extern int this_is_starfire; + struct ino_bucket *bucket = __bucket(p->mask); + unsigned int *imap = bucket->imap; + unsigned int tid; + + /* Never change this, it causes problems on Ex000 systems. */ + if (bucket->pil == 12) + return goal_cpu; + + if(this_is_starfire == 0) { + tid = __cpu_logical_map[goal_cpu] << 26; + } else { + extern unsigned int starfire_translate(unsigned int *imap, + unsigned int upaid); + + tid = (starfire_translate(imap, __cpu_logical_map[goal_cpu]) << 26); + } + *imap = SYSIO_IMAP_VALID | (tid & SYSIO_IMAP_TID); + + goal_cpu++; + if(goal_cpu >= NR_CPUS || + __cpu_logical_map[goal_cpu] == -1) + goal_cpu = 0; + return goal_cpu; +} + +/* Called from request_irq. */ +static void distribute_irqs(void) { unsigned long flags; int cpu, level; -#ifdef SMP_IRQ_VERBOSE - printk("SMP: redistributing interrupts...\n"); -#endif save_and_cli(flags); cpu = 0; for(level = 0; level < NR_IRQS; level++) { struct irqaction *p = irq_action[level]; - while(p) { - if(p->flags & SA_IMAP_MASKED) { - struct ino_bucket *bucket = (struct ino_bucket *)p->mask; - unsigned int *imap = __imap(bucket); - unsigned int val; - unsigned long tid = __cpu_logical_map[cpu] << 26; - - val = *imap; - *imap = SYSIO_IMAP_VALID | (tid & SYSIO_IMAP_TID); - -#ifdef SMP_IRQ_VERBOSE - printk("SMP: Redirecting IGN[%x] INO[%x] " - "to cpu %d [%s]\n", - (val & SYSIO_IMAP_IGN) >> 6, - (val & SYSIO_IMAP_INO), cpu, - p->name); -#endif - - cpu++; - if (cpu >= NR_CPUS || __cpu_logical_map[cpu] == -1) - cpu = 0; - } + if(p->flags & SA_IMAP_MASKED) + cpu = retarget_one_irq(p, cpu); p = p->next; } } restore_flags(flags); - irqs_have_been_distributed = 1; } #endif @@ -1146,13 +1404,13 @@ __initfunc(void init_IRQ(void)) static int called = 0; if (called == 0) { - int i; - called = 1; map_prom_timers(); kill_prom_timer(); - for(i = 0; i < NUM_IVECS; i++) - ivector_to_mask[i] = 0; + memset(&ivector_table[0], 0, sizeof(ivector_table)); +#ifndef __SMP__ + memset(&__up_workvec[0], 0, sizeof(__up_workvec)); +#endif } /* We need to clear any IRQ's pending in the soft interrupt diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S index 34a542ac5..eefc1c074 100644 --- a/arch/sparc64/kernel/itlb_base.S +++ b/arch/sparc64/kernel/itlb_base.S @@ -1,4 +1,4 @@ -/* $Id: itlb_base.S,v 1.5 1998/06/15 16:59:32 jj Exp $ +/* $Id: itlb_base.S,v 1.7 1999/03/02 15:42:12 jj Exp $ * itlb_base.S: Front end to ITLB miss replacement strategy. * This is included directly into the trap table. * @@ -15,11 +15,9 @@ * 2) All user instruction misses. * * All real page faults merge their code paths to the - * sparc64_realfault_* labels below. + * sparc64_realfault_common label below. */ - .globl sparc64_vpte_patchme - /* ITLB ** ICACHE line 1: Quick user TLB misses */ ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS srax %g4, VPTE_SHIFT, %g6 ! Create VPTE offset @@ -42,28 +40,25 @@ /* ITLB ** ICACHE line 3: Real faults */ rdpr %tpc, %g5 ! And load faulting VA + clr %g4 ! It was read sparc64_realfault_common: ! Called by TL0 dtlb_miss too sethi %hi(1f), %g7 ! Save state ba,pt %xcc, etrap ! ... 1: or %g7, %lo(1b), %g7 ! ... - clr %o2 ! It was read -sparc64_realfault_continue: ! Called by dtlb_prot handler + mov %l4, %o2 ! Read/Write/No idea srlx %l5, PAGE_SHIFT, %o1 ! Page align faulting VA add %sp, STACK_BIAS + REGWIN_SZ, %o0! Compute pt_regs arg - call do_sparc64_fault ! Call fault handler /* ITLB ** ICACHE line 4: Call fault processing code */ + call do_sparc64_fault ! Call fault handler sllx %o1, PAGE_SHIFT, %o1 ! Finish page alignment ba,a,pt %xcc, rtrap_clr_l6 ! Restore cpu state + nop winfix_trampoline: rdpr %tpc, %g3 ! Prepare winfixup TNPC or %g3, 0x7c, %g3 ! Compute offset to branch wrpr %g3, %tnpc ! Write it into TNPC done ! Do it to it -sparc64_vpte_nucleus: - ba,pt %xcc, sparc64_vpte_continue ! Part of dtlb_backend -sparc64_vpte_patchme: - sethi %hi(0), %g5 ! This has to be patched #undef TAG_CONTEXT_BITS #undef VPTE_SHIFT diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index c5c7061f7..0d4871132 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -1,4 +1,4 @@ -/* $Id: process.c,v 1.82 1998/10/19 21:52:23 davem Exp $ +/* $Id: process.c,v 1.92 1999/05/08 23:04:48 davem Exp $ * arch/sparc64/kernel/process.c * * Copyright (C) 1995, 1996 David S. Miller (davem@caip.rutgers.edu) @@ -53,11 +53,19 @@ asmlinkage int sys_idle(void) /* endless idle loop with no priority at all */ current->priority = 0; - current->counter = 0; + current->counter = -100; + init_idle(); + for (;;) { - check_pgt_cache(); - run_task_queue(&tq_scheduler); + /* If current->need_resched is zero we should really + * setup for a system wakup event and execute a shutdown + * instruction. + * + * But this requires writing back the contents of the + * L2 cache etc. so implement this later. -DaveM + */ schedule(); + check_pgt_cache(); } return 0; } @@ -67,20 +75,27 @@ asmlinkage int sys_idle(void) /* * the idle loop on a UltraMultiPenguin... */ +#define idle_me_harder() (cpu_data[current->processor].idle_volume += 1) +#define unidle_me() (cpu_data[current->processor].idle_volume = 0) asmlinkage int cpu_idle(void) { current->priority = 0; - while(1) { - struct task_struct *p; + current->counter = -100; + init_idle(); - check_pgt_cache(); - run_task_queue(&tq_scheduler); - current->counter = 0; - if (current->need_resched != 0 || - ((p = init_task.next_run) != NULL && - (p->processor == smp_processor_id() || - (p->tss.flags & SPARC_FLAG_NEWCHILD) != 0))) + while(1) { + if (current->need_resched != 0) { + unidle_me(); schedule(); + check_pgt_cache(); + } + idle_me_harder(); + + /* The store ordering is so that IRQ handlers on + * other cpus see our increasing idleness for the buddy + * redistribution algorithm. -DaveM + */ + membar("#StoreStore | #StoreLoad"); } } @@ -158,12 +173,12 @@ static void show_regwindow32(struct pt_regs *regs) } rw = &r_w; set_fs (old_fs); - printk("l0: %016x l1: %016x l2: %016x l3: %016x\n" - "l4: %016x l5: %016x l6: %016x l7: %016x\n", + printk("l0: %08x l1: %08x l2: %08x l3: %08x " + "l4: %08x l5: %08x l6: %08x l7: %08x\n", rw->locals[0], rw->locals[1], rw->locals[2], rw->locals[3], rw->locals[4], rw->locals[5], rw->locals[6], rw->locals[7]); - printk("i0: %016x i1: %016x i2: %016x i3: %016x\n" - "i4: %016x i5: %016x i6: %016x i7: %016x\n", + printk("i0: %08x i1: %08x i2: %08x i3: %08x " + "i4: %08x i5: %08x i6: %08x i7: %08x\n", rw->ins[0], rw->ins[1], rw->ins[2], rw->ins[3], rw->ins[4], rw->ins[5], rw->ins[6], rw->ins[7]); } @@ -340,13 +355,13 @@ void show_regs32(struct pt_regs32 *regs) { printk("PSR: %08x PC: %08x NPC: %08x Y: %08x\n", regs->psr, regs->pc, regs->npc, regs->y); - printk("g0: %08x g1: %08x g2: %08x g3: %08x\n", + printk("g0: %08x g1: %08x g2: %08x g3: %08x ", regs->u_regs[0], regs->u_regs[1], regs->u_regs[2], regs->u_regs[3]); printk("g4: %08x g5: %08x g6: %08x g7: %08x\n", regs->u_regs[4], regs->u_regs[5], regs->u_regs[6], regs->u_regs[7]); - printk("o0: %08x o1: %08x o2: %08x o3: %08x\n", + printk("o0: %08x o1: %08x o2: %08x o3: %08x ", regs->u_regs[8], regs->u_regs[9], regs->u_regs[10], regs->u_regs[11]); printk("o4: %08x o5: %08x sp: %08x ret_pc: %08x\n", @@ -427,9 +442,7 @@ void flush_thread(void) /* exec_mmap() set context to NO_CONTEXT, here is * where we grab a new one. */ - current->mm->cpu_vm_mask = 0; activate_context(current); - current->mm->cpu_vm_mask = (1UL<<smp_processor_id()); } if (current->tss.flags & SPARC_FLAG_32BIT) __asm__ __volatile__("stxa %%g0, [%0] %1" @@ -447,6 +460,11 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) { unsigned long fp, distance, rval; + /* do_fork() grabs the parent semaphore, we must release it + * temporarily so we can build the child clone stack frame + * without deadlocking. + */ + up(¤t->mm->mmap_sem); if(!(current->tss.flags & SPARC_FLAG_32BIT)) { csp += STACK_BIAS; psp += STACK_BIAS; @@ -463,17 +481,20 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) distance = fp - psp; rval = (csp - distance); if(copy_in_user(rval, psp, distance)) - return 0; - if(current->tss.flags & SPARC_FLAG_32BIT) { + rval = 0; + else if(current->tss.flags & SPARC_FLAG_32BIT) { if(put_user(((u32)csp), &(((struct reg_window32 *)rval)->ins[6]))) - return 0; - return rval; + rval = 0; } else { if(put_user(((u64)csp - STACK_BIAS), &(((struct reg_window *)rval)->ins[6]))) - return 0; - return rval - STACK_BIAS; + rval = 0; + else + rval = rval - STACK_BIAS; } + down(¤t->mm->mmap_sem); + + return rval; } /* Standard stuff. */ @@ -624,6 +645,37 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, } /* + * This is the mechanism for creating a new kernel thread. + * + * NOTE! Only a kernel-only process(ie the swapper or direct descendants + * who haven't done an "execve()") should use this: it will work within + * a system call from a "real" process, but the process memory space will + * not be free'd until both the parent and the child have exited. + */ +pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval; + + __asm__ __volatile("mov %1, %%g1\n\t" + "mov %2, %%o0\n\t" /* Clone flags. */ + "mov 0, %%o1\n\t" /* usp arg == 0 */ + "t 0x6d\n\t" /* Linux/Sparc clone(). */ + "brz,a,pn %%o1, 1f\n\t" /* Parent, just return. */ + " mov %%o0, %0\n\t" + "jmpl %4, %%o7\n\t" /* Call the function. */ + " mov %5, %%o0\n\t" /* Set arg in delay. */ + "mov %3, %%g1\n\t" + "t 0x6d\n\t" /* Linux/Sparc exit(). */ + /* Notreached by child. */ + "1:" : + "=r" (retval) : + "i" (__NR_clone), "r" (flags | CLONE_VM), + "i" (__NR_exit), "r" (fn), "r" (arg) : + "g1", "o0", "o1", "memory", "cc"); + return retval; +} + +/* * fill in the user structure for a core dump.. */ void dump_thread(struct pt_regs * regs, struct user * dump) diff --git a/arch/sparc64/kernel/psycho.c b/arch/sparc64/kernel/psycho.c index 96b1ac2e9..7df9a5882 100644 --- a/arch/sparc64/kernel/psycho.c +++ b/arch/sparc64/kernel/psycho.c @@ -1,8 +1,9 @@ -/* $Id: psycho.c,v 1.66 1998/11/02 22:27:45 davem Exp $ +/* $Id: psycho.c,v 1.85 1999/04/02 14:54:28 davem Exp $ * psycho.c: Ultra/AX U2P PCI controller support. * * Copyright (C) 1997 David S. Miller (davem@caipfs.rutgers.edu) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) */ #include <linux/config.h> @@ -29,14 +30,13 @@ #define dprintf printk #endif - unsigned long pci_dvma_offset = 0x00000000UL; unsigned long pci_dvma_mask = 0xffffffffUL; +#define PCI_DVMA_HASH_NONE 0xffffffffffffffffUL unsigned long pci_dvma_v2p_hash[PCI_DVMA_HASHSZ]; unsigned long pci_dvma_p2v_hash[PCI_DVMA_HASHSZ]; - #ifndef CONFIG_PCI int pcibios_present(void) @@ -74,9 +74,12 @@ asmlinkage int sys_pciconfig_write(unsigned long bus, #include <asm/apb.h> #include <asm/uaccess.h> +#define PSYCHO_REORDER_ONBOARDFIRST 1 + struct linux_psycho *psycho_root = NULL; int linux_num_psycho = 0; static struct linux_pbm_info *bus2pbm[256]; +static int psycho_reorder __initdata = 0; static int pbm_read_config_byte(struct linux_pbm_info *pbm, unsigned char bus, unsigned char devfn, @@ -112,8 +115,10 @@ static __inline__ void set_dvma_hash(unsigned long paddr, unsigned long daddr) pci_dvma_p2v_hash[pci_dvma_ahashfn(dvma_addr)] = vaddr - dvma_addr; } -__initfunc(static void psycho_iommu_init(struct linux_psycho *psycho, int tsbsize)) +static void __init psycho_iommu_init(struct linux_psycho *psycho, int tsbsize) { + extern int this_is_starfire; + extern void *starfire_hookup(int); struct linux_mlist_p1275 *mlist; unsigned long tsbbase; unsigned long control, i, n; @@ -137,37 +142,77 @@ __initfunc(static void psycho_iommu_init(struct linux_psycho *psycho, int tsbsiz break; } tsbbase = __get_free_pages(GFP_DMA, order); + if (!tsbbase) { + prom_printf("IOMMU: Error, kmalloc(tsb) failed.\n"); + prom_halt(); + } iopte = (unsigned long *)tsbbase; - memset(pci_dvma_v2p_hash, 0, sizeof(pci_dvma_v2p_hash)); - memset(pci_dvma_p2v_hash, 0, sizeof(pci_dvma_p2v_hash)); + /* Initialize to "none" settings. */ + for(i = 0; i < PCI_DVMA_HASHSZ; i++) { + pci_dvma_v2p_hash[i] = PCI_DVMA_HASH_NONE; + pci_dvma_p2v_hash[i] = PCI_DVMA_HASH_NONE; + } n = 0; mlist = *prom_meminfo()->p1275_totphys; while (mlist) { unsigned long paddr = mlist->start_adr; + unsigned long num_bytes = mlist->num_bytes; - for (i = 0; i < (mlist->num_bytes >> 16); i++) { + if(paddr >= (((unsigned long) high_memory) - PAGE_OFFSET)) + goto next; + + if((paddr + num_bytes) >= (((unsigned long) high_memory) - PAGE_OFFSET)) + num_bytes = (((unsigned long) high_memory) - PAGE_OFFSET) - paddr; + + /* Align base and length so we map whole hash table sized chunks + * at a time (and therefore full 64K IOMMU pages). + */ + paddr &= ~((1UL << 24UL) - 1); + num_bytes = (num_bytes + ((1UL << 24UL) - 1)) & ~((1UL << 24) - 1); + + /* Move up the base for mappings already created. */ + while(pci_dvma_v2p_hash[pci_dvma_ahashfn(paddr)] != + PCI_DVMA_HASH_NONE) { + paddr += (1UL << 24UL); + num_bytes -= (1UL << 24UL); + if(num_bytes == 0UL) + goto next; + } + + /* Move down the size for tail mappings already created. */ + while(pci_dvma_v2p_hash[pci_dvma_ahashfn(paddr + num_bytes - (1UL << 24UL))] != + PCI_DVMA_HASH_NONE) { + num_bytes -= (1UL << 24UL); + if(num_bytes == 0UL) + goto next; + } + /* Now map the rest. */ + for (i = 0; i < ((num_bytes + ((1 << 16) - 1)) >> 16); i++) { *iopte = (IOPTE_VALID | IOPTE_64K | IOPTE_CACHE | IOPTE_WRITE); *iopte |= paddr; if (!(n & 0xff)) set_dvma_hash(paddr, (n << 16)); - + if (++n > (tsbsize * 1024)) goto out; paddr += (1 << 16); iopte++; } - + next: mlist = mlist->theres_more; } out: - if (mlist) - printk("WARNING: not all physical memory mapped in IOMMU\n"); + if (mlist) { + prom_printf("WARNING: not all physical memory mapped in IOMMU\n"); + prom_printf("Try booting with mem=xxxM or similar\n"); + prom_halt(); + } psycho->psycho_regs->iommu_tsbbase = __pa(tsbbase); @@ -193,6 +238,12 @@ out: break; } psycho->psycho_regs->iommu_control = control; + + /* If necessary, hook us up for starfire IRQ translations. */ + if(this_is_starfire) + psycho->starfire_cookie = starfire_hookup(psycho->upa_portid); + else + psycho->starfire_cookie = NULL; } extern void prom_pbm_ranges_init(int node, struct linux_pbm_info *pbm); @@ -201,7 +252,7 @@ extern void prom_pbm_intmap_init(int node, struct linux_pbm_info *pbm); /* * Poor man's PCI... */ -__initfunc(void sabre_init(int pnode)) +void __init sabre_init(int pnode) { struct linux_prom64_registers pr_regs[2]; struct linux_psycho *sabre; @@ -213,6 +264,10 @@ __initfunc(void sabre_init(int pnode)) int bus; sabre = kmalloc(sizeof(struct linux_psycho), GFP_ATOMIC); + if (!sabre) { + prom_printf("SABRE: Error, kmalloc(sabre) failed.\n"); + prom_halt(); + } portid = prom_getintdefault(pnode, "upa-portid", 0xff); @@ -248,9 +303,11 @@ __initfunc(void sabre_init(int pnode)) prom_halt(); } - printk("PCI: Found SABRE, main regs at %p\n", sabre->psycho_regs); + printk("PCI: Found SABRE, main regs at %p CTRL[%016lx]\n", + sabre->psycho_regs, sabre->psycho_regs->control); #ifdef PROM_DEBUG - dprintf("PCI: Found SABRE, main regs at %p\n", sabre->psycho_regs); + dprintf("PCI: Found SABRE, main regs at %p CTRL[%016lx]\n", + sabre->psycho_regs, sabre->psycho_regs->control); #endif ctrl = sabre->psycho_regs->pci_a_control; @@ -382,7 +439,7 @@ apb_present(struct linux_psycho *psycho) return psycho->pci_bus ? 1 : 0; } -__initfunc(void pcibios_init(void)) +void __init pcibios_init(void) { struct linux_prom64_registers pr_regs[3]; struct linux_psycho *psycho; @@ -408,8 +465,6 @@ __initfunc(void pcibios_init(void)) goto next_pci; } - psycho = kmalloc(sizeof(struct linux_psycho), GFP_ATOMIC); - portid = prom_getintdefault(node, "upa-portid", 0xff); for(search = psycho_root; search; search = search->next) { if(search->upa_portid == portid) { @@ -424,6 +479,11 @@ __initfunc(void pcibios_init(void)) } } + psycho = kmalloc(sizeof(struct linux_psycho), GFP_ATOMIC); + if (!psycho) { + prom_printf("PSYCHO: Error, kmalloc(psycho) failed.\n"); + prom_halt(); + } memset(psycho, 0, sizeof(*psycho)); psycho->next = psycho_root; @@ -494,8 +554,14 @@ __initfunc(void pcibios_init(void)) is_pbm_a = ((pr_regs[0].phys_addr & 0x6000) == 0x2000); /* Enable arbitration for all PCI slots. */ - psycho->psycho_regs->pci_a_control |= 0x3f; - psycho->psycho_regs->pci_b_control |= 0x3f; + psycho->psycho_regs->pci_a_control |= PSYCHO_PCICTRL_AEN; + psycho->psycho_regs->pci_b_control |= PSYCHO_PCICTRL_AEN; + + /* Disable DMA write / PIO rd synchronization on both + * PCI bus segments. + */ + psycho->psycho_regs->pci_a_diag |= PSYCHO_PCIDIAG_DDWSYNC; + psycho->psycho_regs->pci_b_diag |= PSYCHO_PCIDIAG_DDWSYNC; other_pbm: if(is_pbm_a) @@ -609,25 +675,28 @@ static inline struct pcidev_cookie *pci_devcookie_alloc(void) } -__initfunc(static void -pbm_reconfigure_bridges(struct linux_pbm_info *pbm, unsigned char bus)) +static void __init +pbm_reconfigure_bridges(struct linux_pbm_info *pbm, unsigned char bus) { unsigned int devfn, l, class; unsigned char hdr_type = 0; + int is_multi = 0; for (devfn = 0; devfn < 0xff; ++devfn) { - if (PCI_FUNC(devfn) == 0) { - pbm_read_config_byte(pbm, bus, devfn, - PCI_HEADER_TYPE, &hdr_type); - } else if (!(hdr_type & 0x80)) { + if (PCI_FUNC(devfn) != 0 && is_multi == 0) { /* not a multi-function device */ continue; } + pbm_read_config_byte(pbm, bus, devfn, + PCI_HEADER_TYPE, &hdr_type); + if (PCI_FUNC(devfn) == 0) + is_multi = hdr_type & 0x80; /* Check if there is anything here. */ pbm_read_config_dword(pbm, bus, devfn, PCI_VENDOR_ID, &l); - if (l == 0xffffffff || l == 0x00000000) { - hdr_type = 0; + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) { + is_multi = 0; continue; } @@ -657,7 +726,7 @@ pbm_reconfigure_bridges(struct linux_pbm_info *pbm, unsigned char bus)) } } -__initfunc(static void pbm_fixup_busno(struct linux_pbm_info *pbm, unsigned char bus)) +static void __init pbm_fixup_busno(struct linux_pbm_info *pbm, unsigned char bus) { unsigned int nbus; @@ -682,8 +751,7 @@ __initfunc(static void pbm_fixup_busno(struct linux_pbm_info *pbm, unsigned char } while (nbus--); } - -__initfunc(static void apb_init(struct linux_psycho *sabre)) +static void __init apb_init(struct linux_psycho *sabre) { struct pci_dev *pdev; unsigned short stmp; @@ -692,21 +760,20 @@ __initfunc(static void apb_init(struct linux_psycho *sabre)) for(pdev = pci_devices; pdev; pdev = pdev->next) { if(pdev->vendor == PCI_VENDOR_ID_SUN && pdev->device == PCI_DEVICE_ID_SUN_SABRE) { - /* Increase latency timer on top level bridge. */ - pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xf8); + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 128); break; } } for (pdev = sabre->pci_bus->devices; pdev; pdev = pdev->sibling) { if (pdev->vendor == PCI_VENDOR_ID_SUN && pdev->device == PCI_DEVICE_ID_SUN_SIMBA) { - pci_read_config_word(pdev, PCI_COMMAND, &stmp); stmp |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY | PCI_COMMAND_IO; pci_write_config_word(pdev, PCI_COMMAND, stmp); + /* Status register bits are "write 1 to clear". */ pci_write_config_word(pdev, PCI_STATUS, 0xffff); pci_write_config_word(pdev, PCI_SEC_STATUS, 0xffff); @@ -721,28 +788,25 @@ __initfunc(static void apb_init(struct linux_psycho *sabre)) APB_PCI_CTL_HIGH_ARBITER_EN; pci_write_config_dword(pdev, APB_PCI_CONTROL_HIGH, itmp); + /* Systems with SIMBA are usually workstations, so + * we configure to park to SIMBA not to the previous + * bus owner. + */ pci_read_config_dword(pdev, APB_PCI_CONTROL_LOW, &itmp); - itmp = APB_PCI_CTL_LOW_ARB_PARK | - APB_PCI_CTL_LOW_ERRINT_EN | 0x0f; + itmp = APB_PCI_CTL_LOW_ERRINT_EN | 0x0f; pci_write_config_dword(pdev, APB_PCI_CONTROL_LOW, itmp); - /* - * Setup Registers for Guaranteed Completion. + /* Don't mess with the retry limit and PIO/DMA latency + * timer settings. But do set primary and secondary + * latency timers. */ - pci_write_config_byte(pdev, APB_PRIMARY_MASTER_RETRY_LIMIT, 0); - pci_write_config_byte(pdev, APB_SECONDARY_MASTER_RETRY_LIMIT, 0); - pci_write_config_byte(pdev, APB_PIO_TARGET_RETRY_LIMIT, 0x80); - pci_write_config_byte(pdev, APB_PIO_TARGET_LATENCY_TIMER, 0); - pci_write_config_byte(pdev, APB_DMA_TARGET_RETRY_LIMIT, 0x80); - pci_write_config_byte(pdev, APB_DMA_TARGET_LATENCY_TIMER, 0); - - /* Increase primary latency timer. */ - pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0xf8); + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 128); + pci_write_config_byte(pdev, PCI_SEC_LATENCY_TIMER, 128); } } } -__initfunc(static void sabre_probe(struct linux_psycho *sabre)) +static void __init sabre_probe(struct linux_psycho *sabre) { struct pci_bus *pbus = sabre->pci_bus; static unsigned char busno = 0; @@ -764,7 +828,7 @@ __initfunc(static void sabre_probe(struct linux_psycho *sabre)) } -__initfunc(static void pbm_probe(struct linux_pbm_info *pbm)) +static void __init pbm_probe(struct linux_pbm_info *pbm) { static struct pci_bus *pchain = NULL; struct pci_bus *pbus = &pbm->pci_bus; @@ -803,9 +867,9 @@ __initfunc(static void pbm_probe(struct linux_pbm_info *pbm)) } } -__initfunc(static int pdev_to_pnode_sibtraverse(struct linux_pbm_info *pbm, - struct pci_dev *pdev, - int pnode)) +static int __init pdev_to_pnode_sibtraverse(struct linux_pbm_info *pbm, + struct pci_dev *pdev, + int pnode) { struct linux_prom_pci_registers pregs[PROMREG_MAX]; int node; @@ -827,8 +891,8 @@ __initfunc(static int pdev_to_pnode_sibtraverse(struct linux_pbm_info *pbm, return 0; } -__initfunc(static void pdev_cookie_fillin(struct linux_pbm_info *pbm, - struct pci_dev *pdev, int pnode)) +static void __init pdev_cookie_fillin(struct linux_pbm_info *pbm, + struct pci_dev *pdev, int pnode) { struct pcidev_cookie *pcp; int node; @@ -846,9 +910,9 @@ __initfunc(static void pdev_cookie_fillin(struct linux_pbm_info *pbm, #endif } -__initfunc(static void fill_in_pbm_cookies(struct pci_bus *pbus, - struct linux_pbm_info *pbm, - int node)) +static void __init fill_in_pbm_cookies(struct pci_bus *pbus, + struct linux_pbm_info *pbm, + int node) { struct pci_dev *pdev; @@ -868,7 +932,7 @@ __initfunc(static void fill_in_pbm_cookies(struct pci_bus *pbus, } } -__initfunc(static void sabre_cookie_fillin(struct linux_psycho *sabre)) +static void __init sabre_cookie_fillin(struct linux_psycho *sabre) { struct pci_bus *pbus = sabre->pci_bus; @@ -886,9 +950,9 @@ __initfunc(static void sabre_cookie_fillin(struct linux_psycho *sabre)) * properties, and recording them in pci_vma's linked in via * PBM->assignments. */ -__initfunc(static int gimme_ebus_assignments(int node, struct linux_prom_pci_registers *aregs)) +static int __init gimme_ebus_assignments(int node, struct linux_prom_pci_registers *aregs) { - struct linux_prom_ebus_ranges erng[PROMREG_MAX]; + struct linux_prom_ebus_ranges erng[PROM_PCIRNG_MAX]; int err, iter; err = prom_getproperty(node, "ranges", (char *)&erng[0], sizeof(erng)); @@ -911,7 +975,7 @@ __initfunc(static int gimme_ebus_assignments(int node, struct linux_prom_pci_reg return err; } -__initfunc(static void assignment_process(struct linux_pbm_info *pbm, int node)) +static void __init assignment_process(struct linux_pbm_info *pbm, int node) { struct linux_prom_pci_registers aregs[PROMREG_MAX]; char pname[256]; @@ -968,7 +1032,7 @@ __initfunc(static void assignment_process(struct linux_pbm_info *pbm, int node)) } } -__initfunc(static void assignment_walk_siblings(struct linux_pbm_info *pbm, int node)) +static void __init assignment_walk_siblings(struct linux_pbm_info *pbm, int node) { while(node) { int child = prom_getchild(node); @@ -1077,12 +1141,12 @@ static inline void record_assignments(struct linux_pbm_info *pbm) #endif } -__initfunc(static void fixup_regs(struct pci_dev *pdev, - struct linux_pbm_info *pbm, - struct linux_prom_pci_registers *pregs, - int nregs, - struct linux_prom_pci_registers *assigned, - int numaa)) +static void __init fixup_regs(struct pci_dev *pdev, + struct linux_pbm_info *pbm, + struct linux_prom_pci_registers *pregs, + int nregs, + struct linux_prom_pci_registers *assigned, + int numaa) { int preg, rng; int IO_seen = 0; @@ -1173,12 +1237,13 @@ __initfunc(static void fixup_regs(struct pci_dev *pdev, } if (bsreg == PCI_ROM_ADDRESS) { pdev->rom_address = (unsigned long)__va(pci_addr); - pdev->rom_address |= 1; + pdev->rom_address &= ~1UL; + /* - * Enable access to the ROM. + * Disable access to the ROM. */ pci_read_config_dword(pdev, PCI_ROM_ADDRESS, &rtmp); - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, rtmp | 1); + pci_write_config_dword(pdev, PCI_ROM_ADDRESS, rtmp & ~1); } else pdev->base_address[brindex] = (unsigned long)__va(pci_addr); @@ -1347,7 +1412,7 @@ __initfunc(static void fixup_regs(struct pci_dev *pdev, rtmp = new_base; pci_read_config_dword(pdev, breg, &base); - rtmp |= (base & ~PCI_ROM_ADDRESS_MASK); + rtmp &= ~(base & ~PCI_ROM_ADDRESS_MASK); pci_write_config_dword(pdev, breg, rtmp); /* Apply PBM ranges and update pci_dev. */ @@ -1370,8 +1435,7 @@ __initfunc(static void fixup_regs(struct pci_dev *pdev, "PBM ranges\n"); } pdev->rom_address = (unsigned long)__va(pci_addr); - - pdev->rom_address |= (base & ~PCI_ROM_ADDRESS_MASK); + pdev->rom_address &= ~(base & ~PCI_ROM_ADDRESS_MASK); MEM_seen = 1; } rom_address_done: @@ -1415,7 +1479,7 @@ __initfunc(static void fixup_regs(struct pci_dev *pdev, #define imap_offset(__member) \ ((unsigned long)(&(((struct psycho_regs *)0)->__member))) -__initfunc(static unsigned long psycho_pcislot_imap_offset(unsigned long ino)) +static unsigned long __init psycho_pcislot_imap_offset(unsigned long ino) { unsigned int bus, slot; @@ -1431,11 +1495,8 @@ __initfunc(static unsigned long psycho_pcislot_imap_offset(unsigned long ino)) case 2: return imap_offset(imap_a_slot2); case 3: - return imap_offset(imap_a_slot3); default: - prom_printf("pcislot_imap: IMPOSSIBLE [%d:%d]\n", - bus, slot); - prom_halt(); + return imap_offset(imap_a_slot3); } } else { switch(slot) { @@ -1446,19 +1507,16 @@ __initfunc(static unsigned long psycho_pcislot_imap_offset(unsigned long ino)) case 2: return imap_offset(imap_b_slot2); case 3: - return imap_offset(imap_b_slot3); default: - prom_printf("pcislot_imap: IMPOSSIBLE [%d:%d]\n", - bus, slot); - prom_halt(); + return imap_offset(imap_b_slot3); } } } /* Exported for EBUS probing layer. */ -__initfunc(unsigned int psycho_irq_build(struct linux_pbm_info *pbm, - struct pci_dev *pdev, - unsigned int ino)) +unsigned int __init psycho_irq_build(struct linux_pbm_info *pbm, + struct pci_dev *pdev, + unsigned int ino) { unsigned long imap_off; int need_dma_sync = 0; @@ -1533,6 +1591,36 @@ __initfunc(unsigned int psycho_irq_build(struct linux_pbm_info *pbm, imap_off = imap_offset(imap_ser); break; + case 0x2c: + /* Onboard Timer 0 */ + imap_off = imap_offset(imap_tim0); + break; + + case 0x2d: + /* Onboard Timer 1 */ + imap_off = imap_offset(imap_tim1); + break; + + case 0x2e: + /* Psycho UE Interrupt */ + imap_off = imap_offset(imap_ue); + break; + + case 0x2f: + /* Psycho CE Interrupt */ + imap_off = imap_offset(imap_ce); + break; + + case 0x30: + /* Psycho PCI A Error Interrupt */ + imap_off = imap_offset(imap_a_err); + break; + + case 0x31: + /* Psycho PCI B Error Interrupt */ + imap_off = imap_offset(imap_b_err); + break; + case 0x32: /* Power Management */ imap_off = imap_offset(imap_pmgmt); @@ -1554,37 +1642,78 @@ __initfunc(unsigned int psycho_irq_build(struct linux_pbm_info *pbm, return psycho_build_irq(pbm->parent, imap_off, ino, need_dma_sync); } -__initfunc(static int pbm_intmap_match(struct linux_pbm_info *pbm, - struct pci_dev *pdev, - struct linux_prom_pci_registers *preg, - unsigned int *interrupt)) +static int __init pbm_intmap_match(struct linux_pbm_info *pbm, + struct pci_dev *pdev, + struct linux_prom_pci_registers *preg, + unsigned int *interrupt) { struct linux_prom_pci_registers ppreg; unsigned int hi, mid, lo, irq; int i; - if (!pbm->num_pbm_intmap) +#ifdef FIXUP_IRQ_DEBUG + dprintf("pbm_intmap_match: "); +#endif + if (!pbm->num_pbm_intmap) { +#ifdef FIXUP_IRQ_DEBUG + dprintf("No intmap UPA[%x:%c]\n", + pbm->parent->upa_portid, + (pbm == &pbm->parent->pbm_A) ? 'A' : 'B'); +#endif return 0; - + } /* * Underneath a bridge, use register of parent bridge. */ if (pdev->bus->number != pbm->pci_first_busno) { - struct pcidev_cookie *pcp = pdev->bus->self->sysdata; - int node; + struct pcidev_cookie *pcp; + int node, offset; + char prom_name[64]; - if (!pcp) +#ifdef FIXUP_IRQ_DEBUG + dprintf("UnderBridge, "); +#endif + pcp = pdev->bus->self->sysdata; + if (!pcp) { +#ifdef FIXUP_IRQ_DEBUG + dprintf("No bus PCP\n"); +#endif goto out; - + } node = pcp->prom_node; i = prom_getproperty(node, "reg", (char*)&ppreg, sizeof(ppreg)); - if(i == 0 || i == -1) + if(i == 0 || i == -1) { +#ifdef FIXUP_IRQ_DEBUG + dprintf("No reg property.\n"); +#endif goto out; + } + /* + * Did PROM know better and assign an interrupt different + * to #INTA to the device? - We test here for presence of + * FCODE on the card, in this case we assume PROM has set + * correct 'interrupts' property, unless it is quadhme. + */ + pcp = pdev->sysdata; + if (!pcp) { +#ifdef FIXUP_IRQ_DEBUG + dprintf("No dev PCP\n"); +#endif + goto out; + } + node = pcp->prom_node; - /* Use low slot number bits of child as IRQ line. */ - *interrupt = ((pdev->devfn >> 3) & 3) + 1; - + offset = prom_getint(node, "fcode-rom-offset"); + prom_getstring(node, "name", prom_name, sizeof(prom_name)); + if (offset == -1 || + !strcmp(prom_name, "SUNW,qfe") || + !strcmp(prom_name, "qfe")) { + /* + * No, use low slot number bits of child as IRQ line. + */ + *interrupt = ((*interrupt - 1 + PCI_SLOT(pdev->devfn)) & 3) + 1; + } preg = &ppreg; } @@ -1618,13 +1747,12 @@ out: prom_halt(); } -__initfunc(static void fixup_irq(struct pci_dev *pdev, - struct linux_pbm_info *pbm, - struct linux_prom_pci_registers *preg, - int node)) +static void __init fixup_irq(struct pci_dev *pdev, + struct linux_pbm_info *pbm, + struct linux_prom_pci_registers *preg, + int node) { unsigned int prom_irq, portid = pbm->parent->upa_portid; - unsigned char pci_irq_line = pdev->irq; int err; #ifdef FIXUP_IRQ_DEBUG @@ -1668,7 +1796,25 @@ __initfunc(static void fixup_irq(struct pci_dev *pdev, unsigned int bus, slot, line; bus = (pbm == &pbm->parent->pbm_B) ? (1 << 4) : 0; - line = (pci_irq_line) & 3; + + /* Use the given interrupt property value as the line if it + * is non-zero and legal. Legal encodings are INTA=1, INTB=2, + * INTC=3, INTD=4 as per PCI OBP binding spec version 2.1 -DaveM + */ + if(prom_irq > 0 && prom_irq < 5) { + line = ((prom_irq - 1) & 3); + } else { + unsigned char pci_irq_line; + + /* The generic PCI probing layer will read the + * interrupt line into pdev->irq if the interrupt + * pin is non-zero, so we have to explicitly fetch + * the pin here to be certain (the interrupt line is + * typically left at zero by OBP). + */ + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pci_irq_line); + line = ((pci_irq_line - 1) & 3); + } /* Slot determination is only slightly complex. Handle * the easy case first. @@ -1721,11 +1867,11 @@ __initfunc(static void fixup_irq(struct pci_dev *pdev, #endif } -__initfunc(static void fixup_doit(struct pci_dev *pdev, - struct linux_pbm_info *pbm, - struct linux_prom_pci_registers *pregs, - int nregs, - int node)) +static void __init fixup_doit(struct pci_dev *pdev, + struct linux_pbm_info *pbm, + struct linux_prom_pci_registers *pregs, + int nregs, + int node) { struct linux_prom_pci_registers assigned[PROMREG_MAX]; int numaa, err; @@ -1745,9 +1891,9 @@ __initfunc(static void fixup_doit(struct pci_dev *pdev, fixup_irq(pdev, pbm, &pregs[0], node); } -__initfunc(static void fixup_pci_dev(struct pci_dev *pdev, - struct pci_bus *pbus, - struct linux_pbm_info *pbm)) +static void __init fixup_pci_dev(struct pci_dev *pdev, + struct pci_bus *pbus, + struct linux_pbm_info *pbm) { struct linux_prom_pci_registers pregs[PROMREG_MAX]; struct pcidev_cookie *pcp = pdev->sysdata; @@ -1762,8 +1908,12 @@ __initfunc(static void fixup_pci_dev(struct pci_dev *pdev, cmd |= PCI_COMMAND_MASTER; pci_write_config_word(pdev, PCI_COMMAND, cmd); - /* Now, set cache line size to 64-bytes. */ - pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 64); + /* Now, set cache line size to 64-bytes. + * NOTE: Cache line size is in 32-bit word units. + */ + pci_write_config_byte(pdev, + PCI_CACHE_LINE_SIZE, + (64 / sizeof(u32))); } /* Ignore if this is one of the PBM's, EBUS, or a @@ -1808,7 +1958,7 @@ __initfunc(static void fixup_pci_dev(struct pci_dev *pdev, } } -__initfunc(static void fixup_pci_bus(struct pci_bus *pbus, struct linux_pbm_info *pbm)) +static void __init fixup_pci_bus(struct pci_bus *pbus, struct linux_pbm_info *pbm) { struct pci_dev *pdev; @@ -1819,7 +1969,7 @@ __initfunc(static void fixup_pci_bus(struct pci_bus *pbus, struct linux_pbm_info fixup_pci_bus(pbus, pbm); } -__initfunc(static void fixup_addr_irq(struct linux_pbm_info *pbm)) +static void __init fixup_addr_irq(struct linux_pbm_info *pbm) { struct pci_bus *pbus = &pbm->pci_bus; @@ -1832,7 +1982,7 @@ __initfunc(static void fixup_addr_irq(struct linux_pbm_info *pbm)) /* Walk all PCI devices probes, fixing up base registers and IRQ registers. * We use OBP for most of this work. */ -__initfunc(static void psycho_final_fixup(struct linux_psycho *psycho)) +static void __init psycho_final_fixup(struct linux_psycho *psycho) { /* Second, fixup base address registers and IRQ lines... */ if (psycho->pbm_A.parent) @@ -1841,7 +1991,33 @@ __initfunc(static void psycho_final_fixup(struct linux_psycho *psycho)) fixup_addr_irq(&psycho->pbm_B); } -__initfunc(void pcibios_fixup(void)) +/* Reorder the pci_dev chain, so that onboard devices come first + and then come the pluggable cards. */ +void __init psycho_reorder_devs(void) +{ + struct pci_dev **pci_onboard = &pci_devices; + struct pci_dev **pci_tail = &pci_devices; + struct pci_dev *pdev = pci_devices, *pci_other = NULL; + + while (pdev) { + if (pdev->irq && (__irq_ino(pdev->irq) & 0x20)) { + if (pci_other) { + *pci_onboard = pdev; + pci_onboard = &pdev->next; + pdev = pdev->next; + *pci_onboard = pci_other; + *pci_tail = pdev; + continue; + } else + pci_onboard = &pdev->next; + } else if (!pci_other) + pci_other = pdev; + pci_tail = &pdev->next; + pdev = pdev->next; + } +} + +void __init pcibios_fixup(void) { struct linux_psycho *psycho; @@ -1861,9 +2037,9 @@ __initfunc(void pcibios_fixup(void)) for (psycho = psycho_root; psycho; psycho = psycho->next) { /* Probe bus on builtin PCI. */ - if (apb_present(psycho)) + if (apb_present(psycho)) { sabre_probe(psycho); - else { + } else { /* Probe busses under PBM B. */ pbm_probe(&psycho->pbm_B); @@ -1896,6 +2072,9 @@ __initfunc(void pcibios_fixup(void)) psycho_final_fixup(psycho); } + if (psycho_reorder & PSYCHO_REORDER_ONBOARDFIRST) + psycho_reorder_devs(); + return ebus_init(); } @@ -2418,12 +2597,20 @@ asmlinkage int sys_pciconfig_write(unsigned long bus, return err; } -__initfunc(void pcibios_fixup_bus(struct pci_bus *bus)) +void __init pcibios_fixup_bus(struct pci_bus *bus) { } -__initfunc(char *pcibios_setup(char *str)) +char * __init pcibios_setup(char *str) { + if (!strcmp(str, "onboardfirst")) { + psycho_reorder |= PSYCHO_REORDER_ONBOARDFIRST; + return NULL; + } + if (!strcmp(str, "noreorder")) { + psycho_reorder = 0; + return NULL; + } return str; } diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 4063a1e86..4dd9651b3 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -591,6 +591,8 @@ asmlinkage void do_ptrace(struct pt_regs *regs) if (((current->personality & PER_BSD) && (request == PTRACE_SUNATTACH)) || (!(current->personality & PER_BSD) && (request == PTRACE_ATTACH))) { + unsigned long flags; + if(child == current) { /* Try this under SunOS/Solaris, bwa haha * You'll never be able to kill the process. ;-) @@ -602,8 +604,9 @@ asmlinkage void do_ptrace(struct pt_regs *regs) (current->uid != child->euid) || (current->uid != child->uid) || (current->gid != child->egid) || - (current->gid != child->gid)) && - !capable(CAP_SYS_PTRACE)) { + (current->gid != child->sgid) || + (!cap_issubset(child->cap_permitted, current->cap_permitted)) || + (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE)) { pt_error_return(regs, EPERM); goto out; } @@ -613,15 +616,13 @@ asmlinkage void do_ptrace(struct pt_regs *regs) goto out; } child->flags |= PF_PTRACED; + write_lock_irqsave(&tasklist_lock, flags); if(child->p_pptr != current) { - unsigned long flags; - - write_lock_irqsave(&tasklist_lock, flags); REMOVE_LINKS(child); child->p_pptr = current; SET_LINKS(child); - write_unlock_irqrestore(&tasklist_lock, flags); } + write_unlock_irqrestore(&tasklist_lock, flags); send_sig(SIGSTOP, child, 1); pt_succ_return(regs, 0); goto out; @@ -670,14 +671,18 @@ asmlinkage void do_ptrace(struct pt_regs *regs) pt_error_return(regs, EINVAL); goto out; } + down(&child->mm->mmap_sem); res = read_int(child, addr, &x); + up(&child->mm->mmap_sem); tmp = x; } else { if(addr & (sizeof(unsigned long) - 1)) { pt_error_return(regs, EINVAL); goto out; } + down(&child->mm->mmap_sem); res = read_long(child, addr, &tmp); + up(&child->mm->mmap_sem); } if (res < 0) { pt_error_return(regs, -res); @@ -709,13 +714,17 @@ asmlinkage void do_ptrace(struct pt_regs *regs) pt_error_return(regs, EINVAL); goto out; } + down(&child->mm->mmap_sem); res = write_int(child, addr, data); + up(&child->mm->mmap_sem); } else { if(addr & (sizeof(unsigned long) - 1)) { pt_error_return(regs, EINVAL); goto out; } + down(&child->mm->mmap_sem); res = write_long(child, addr, data); + up(&child->mm->mmap_sem); } if(res < 0) pt_error_return(regs, -res); @@ -944,12 +953,15 @@ asmlinkage void do_ptrace(struct pt_regs *regs) unsigned long page; while(len) { + down(&child->mm->mmap_sem); vma = find_extend_vma(child, src); if (!vma) { + up(&child->mm->mmap_sem); pt_error_return(regs, EIO); goto flush_and_out; } pgtable = get_page (child, vma, src, 0); + up(&child->mm->mmap_sem); if (src & ~PAGE_MASK) { curlen = PAGE_SIZE - (src & ~PAGE_MASK); if (curlen > len) curlen = len; @@ -988,12 +1000,15 @@ asmlinkage void do_ptrace(struct pt_regs *regs) unsigned long page; while(len) { + down(&child->mm->mmap_sem); vma = find_extend_vma(child, dest); if (!vma) { + up(&child->mm->mmap_sem); pt_error_return(regs, EIO); goto flush_and_out; } pgtable = get_page (child, vma, dest, 1); + up(&child->mm->mmap_sem); if (dest & ~PAGE_MASK) { curlen = PAGE_SIZE - (dest & ~PAGE_MASK); if (curlen > len) curlen = len; diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 570322eec..caa1d99ef 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.37 1998/10/14 15:49:09 ecd Exp $ +/* $Id: setup.c,v 1.43 1999/04/12 08:08:24 davem Exp $ * linux/arch/sparc64/kernel/setup.c * * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu) @@ -277,6 +277,22 @@ static int console_fb __initdata = 0; #endif static unsigned long memory_size = 0; +#ifdef PROM_DEBUG_CONSOLE +static struct console prom_debug_console = { + "debug", + prom_console_write, + NULL, + NULL, + NULL, + NULL, + NULL, + CON_PRINTBUFFER, + -1, + 0, + NULL +}; +#endif + /* XXX Implement this at some point... */ void kernel_enter_debugger(void) { @@ -397,13 +413,12 @@ __initfunc(static void boot_flags_init(char *commands)) extern int prom_probe_memory(void); extern unsigned long start, end; extern void panic_setup(char *, int *); -extern unsigned long sun_serial_setup(unsigned long); extern unsigned short root_flags; extern unsigned short root_dev; extern unsigned short ram_flags; -extern unsigned int ramdisk_image; -extern unsigned int ramdisk_size; +extern unsigned int sparc_ramdisk_image; +extern unsigned int sparc_ramdisk_size; #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 @@ -430,6 +445,10 @@ __initfunc(void setup_arch(char **cmdline_p, *cmdline_p = prom_getbootargs(); strcpy(saved_command_line, *cmdline_p); +#ifdef PROM_DEBUG_CONSOLE + register_console(&prom_debug_console); +#endif + printk("ARCH: SUN4U\n"); #ifdef CONFIG_DUMMY_CONSOLE @@ -489,13 +508,13 @@ __initfunc(void setup_arch(char **cmdline_p, rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0); #endif #ifdef CONFIG_BLK_DEV_INITRD - if (ramdisk_image) { + if (sparc_ramdisk_image) { unsigned long start = 0; - if (ramdisk_image >= (unsigned long)&end - 2 * PAGE_SIZE) - ramdisk_image -= KERNBASE; - initrd_start = ramdisk_image + phys_base + PAGE_OFFSET; - initrd_end = initrd_start + ramdisk_size; + if (sparc_ramdisk_image >= (unsigned long)&end - 2 * PAGE_SIZE) + sparc_ramdisk_image -= KERNBASE; + initrd_start = sparc_ramdisk_image + phys_base + PAGE_OFFSET; + initrd_end = initrd_start + sparc_ramdisk_size; if (initrd_end > *memory_end_p) { printk(KERN_CRIT "initrd extends beyond end of memory " "(0x%016lx > 0x%016lx)\ndisabling initrd\n", @@ -503,10 +522,10 @@ __initfunc(void setup_arch(char **cmdline_p, initrd_start = 0; } if (initrd_start) - start = ramdisk_image + KERNBASE; + start = sparc_ramdisk_image + KERNBASE; if (start >= *memory_start_p && start < *memory_start_p + 2 * PAGE_SIZE) { initrd_below_start_ok = 1; - *memory_start_p = PAGE_ALIGN (start + ramdisk_size); + *memory_start_p = PAGE_ALIGN (start + sparc_ramdisk_size); } } #endif @@ -531,7 +550,7 @@ __initfunc(void setup_arch(char **cmdline_p, ic_servaddr = sv; if (gw) ic_gateway = gw; - ic_bootp_flag = ic_rarp_flag = 0; + ic_proto_enabled = 0; } } #endif @@ -566,7 +585,6 @@ __initfunc(void setup_arch(char **cmdline_p, serial_console = 2; break; } - *memory_start_p = sun_serial_setup(*memory_start_p); /* set this up ASAP */ #else serial_console = 0; #endif diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 4bdfca1b7..27344f4b6 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -5,6 +5,8 @@ #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/mm.h> +#include <linux/pagemap.h> #include <linux/tasks.h> #include <linux/smp.h> #include <linux/smp_lock.h> @@ -34,24 +36,23 @@ extern int linux_num_cpus; extern void calibrate_delay(void); extern unsigned prom_cpu_nodes[]; -volatile int smp_processors_ready = 0; -unsigned long cpu_present_map = 0; -int smp_num_cpus = 1; -int smp_threads_ready = 0; +struct cpuinfo_sparc cpu_data[NR_CPUS] __attribute__ ((aligned (64))); -struct cpuinfo_sparc cpu_data[NR_CPUS] __attribute__ ((aligned (64))); +volatile int cpu_number_map[NR_CPUS] __attribute__ ((aligned (64))); +volatile int __cpu_logical_map[NR_CPUS] __attribute__ ((aligned (64))); -/* Please don't make this initdata!!! --DaveM */ +/* Please don't make this stuff initdata!!! --DaveM */ static unsigned char boot_cpu_id = 0; - static int smp_activated = 0; -volatile int cpu_number_map[NR_CPUS]; -volatile int __cpu_logical_map[NR_CPUS]; - /* Kernel spinlock */ spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; +volatile int smp_processors_ready = 0; +unsigned long cpu_present_map = 0; +int smp_num_cpus = 1; +int smp_threads_ready = 0; + __initfunc(void smp_setup(char *str, int *ints)) { /* XXX implement me XXX */ @@ -84,6 +85,8 @@ int smp_bogo(char *buf) __initfunc(void smp_store_cpu_info(int id)) { + int i; + cpu_data[id].irq_count = 0; cpu_data[id].bh_count = 0; /* multiplier and counter set by @@ -94,16 +97,18 @@ __initfunc(void smp_store_cpu_info(int id)) cpu_data[id].pte_cache = NULL; cpu_data[id].pgdcache_size = 0; cpu_data[id].pgd_cache = NULL; -} + cpu_data[id].idle_volume = 1; -extern void distribute_irqs(void); + for(i = 0; i < 16; i++) + cpu_data[id].irq_worklists[i] = 0; +} __initfunc(void smp_commence(void)) { - distribute_irqs(); } static void smp_setup_percpu_timer(void); +static void smp_tune_scheduling(void); static volatile unsigned long callin_flag = 0; @@ -173,10 +178,16 @@ void cpu_panic(void) panic("SMP bolixed\n"); } -extern struct prom_cpuinfo linux_cpus[NR_CPUS]; +extern struct prom_cpuinfo linux_cpus[64]; extern unsigned long smp_trampoline; +/* The OBP cpu startup callback truncates the 3rd arg cookie to + * 32-bits (I think) so to be safe we have it read the pointer + * contained here so we work on >4GB machines. -DaveM + */ +static struct task_struct *cpu_new_task = NULL; + __initfunc(void smp_boot_cpus(void)) { int cpucount = 0, i; @@ -184,6 +195,8 @@ __initfunc(void smp_boot_cpus(void)) printk("Entering UltraSMPenguin Mode...\n"); __sti(); smp_store_cpu_info(boot_cpu_id); + smp_tune_scheduling(); + init_idle(); if(linux_num_cpus == 1) return; @@ -194,21 +207,25 @@ __initfunc(void smp_boot_cpus(void)) if(cpu_present_map & (1UL << i)) { unsigned long entry = (unsigned long)(&smp_trampoline); + unsigned long cookie = (unsigned long)(&cpu_new_task); struct task_struct *p; int timeout; int no; extern unsigned long phys_base; entry += phys_base - KERNBASE; + cookie += phys_base - KERNBASE; kernel_thread(start_secondary, NULL, CLONE_PID); p = task[++cpucount]; p->processor = i; + p->has_cpu = 1; /* we schedule the first task manually */ callin_flag = 0; for (no = 0; no < linux_num_cpus; no++) if (linux_cpus[no].mid == i) break; + cpu_new_task = p; prom_startcpu(linux_cpus[no].prom_node, - entry, ((unsigned long)p)); + entry, cookie); for(timeout = 0; timeout < 5000000; timeout++) { if(callin_flag) break; @@ -216,8 +233,8 @@ __initfunc(void smp_boot_cpus(void)) } if(callin_flag) { cpu_number_map[i] = cpucount; - prom_cpu_nodes[i] = linux_cpus[no].prom_node; __cpu_logical_map[cpucount] = i; + prom_cpu_nodes[i] = linux_cpus[no].prom_node; } else { cpucount--; printk("Processor %d is stuck.\n", i); @@ -228,6 +245,7 @@ __initfunc(void smp_boot_cpus(void)) cpu_number_map[i] = -1; } } + cpu_new_task = NULL; if(cpucount == 0) { printk("Error: only one processor found.\n"); cpu_present_map = (1UL << smp_processor_id()); @@ -249,17 +267,6 @@ __initfunc(void smp_boot_cpus(void)) membar("#StoreStore | #StoreLoad"); } -/* We don't even need to do anything, the only generic message pass done - * anymore is to stop all cpus during a panic(). When the user drops to - * the PROM prompt, the firmware will send the other cpu's it's MONDO - * vector anyways, so doing anything special here is pointless. - * - * This whole thing should go away anyways... - */ -void smp_message_pass(int target, int msg, unsigned long data, int wait) -{ -} - /* #define XCALL_DEBUG */ static inline void xcall_deliver(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu) @@ -342,6 +349,17 @@ extern unsigned long xcall_flush_tlb_all; extern unsigned long xcall_tlbcachesync; extern unsigned long xcall_flush_cache_all; extern unsigned long xcall_report_regs; +extern unsigned long xcall_receive_signal; + +void smp_receive_signal(int cpu) +{ + if(smp_processors_ready && + (cpu_present_map & (1UL<<cpu)) != 0) { + u64 pstate, data0 = (((u64)&xcall_receive_signal) & 0xffffffff); + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + xcall_deliver(data0, 0, 0, pstate, cpu); + } +} void smp_report_regs(void) { @@ -364,37 +382,51 @@ void smp_flush_tlb_all(void) * to the stack before we get here because all callers of us * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. + * + * The SMP TLB coherency scheme we use works as follows: + * + * 1) mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to avoid doing cross calls. + * + * 2) TLB context numbers are shared globally across all processors + * in the system, this allows us to play several games to avoid + * cross calls. + * + * One invariant is that when a cpu switches to a process, and + * that processes tsk->mm->cpu_vm_mask does not have the current + * cpu's bit set, that tlb context is flushed locally. + * + * If the address space is non-shared (ie. mm->count == 1) we avoid + * cross calls when we want to flush the currently running process's + * tlb state. This is done by clearing all cpu bits except the current + * processor's in current->mm->cpu_vm_mask and performing the flush + * locally only. This will force any subsequent cpus which run this + * task to flush the context from the local tlb if the process migrates + * to another cpu (again). + * + * 3) For shared address spaces (threads) and swapping we bite the + * bullet for most cases and perform the cross call. + * + * The performance gain from "optimizing" away the cross call for threads is + * questionable (in theory the big win for threads is the massive sharing of + * address space state across processors). + * + * For the swapping case the locking is difficult to get right, we'd have to + * enforce strict ordered access to mm->cpu_vm_mask via a spinlock for example. + * Then again one could argue that when you are swapping, the cost of a cross + * call won't even show up on the performance radar. But in any case we do get + * rid of the cross-call when the task has a dead context or the task has only + * ever run on the local cpu. */ -static void smp_cross_call_avoidance(struct mm_struct *mm) -{ - u32 ctx; - - spin_lock(&scheduler_lock); - get_new_mmu_context(mm); - mm->cpu_vm_mask = (1UL << smp_processor_id()); - current->tss.ctx = ctx = mm->context & 0x3ff; - spitfire_set_secondary_context(ctx); - __asm__ __volatile__("flush %g6"); - spitfire_flush_dtlb_secondary_context(); - spitfire_flush_itlb_secondary_context(); - __asm__ __volatile__("flush %g6"); - if(!segment_eq(current->tss.current_ds,USER_DS)) { - /* Rarely happens. */ - current->tss.ctx = 0; - spitfire_set_secondary_context(0); - __asm__ __volatile__("flush %g6"); - } - spin_unlock(&scheduler_lock); -} - void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = mm->context & 0x3ff; if(mm == current->mm && atomic_read(&mm->count) == 1) { - if(mm->cpu_vm_mask == (1UL << smp_processor_id())) - goto local_flush_and_out; - return smp_cross_call_avoidance(mm); + if(mm->cpu_vm_mask != (1UL << smp_processor_id())) + mm->cpu_vm_mask = (1UL << smp_processor_id()); + goto local_flush_and_out; } smp_cross_call(&xcall_flush_tlb_mm, ctx, 0, 0); @@ -410,9 +442,9 @@ void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start, start &= PAGE_MASK; end &= PAGE_MASK; if(mm == current->mm && atomic_read(&mm->count) == 1) { - if(mm->cpu_vm_mask == (1UL << smp_processor_id())) - goto local_flush_and_out; - return smp_cross_call_avoidance(mm); + if(mm->cpu_vm_mask != (1UL << smp_processor_id())) + mm->cpu_vm_mask = (1UL << smp_processor_id()); + goto local_flush_and_out; } smp_cross_call(&xcall_flush_tlb_range, ctx, start, end); @@ -426,22 +458,26 @@ void smp_flush_tlb_page(struct mm_struct *mm, unsigned long page) page &= PAGE_MASK; if(mm == current->mm && atomic_read(&mm->count) == 1) { - if(mm->cpu_vm_mask == (1UL << smp_processor_id())) - goto local_flush_and_out; - return smp_cross_call_avoidance(mm); - } -#if 0 /* XXX Disabled until further notice... */ - else if(atomic_read(&mm->count) == 1) { + if(mm->cpu_vm_mask != (1UL << smp_processor_id())) + mm->cpu_vm_mask = (1UL << smp_processor_id()); + goto local_flush_and_out; + } else { /* Try to handle two special cases to avoid cross calls * in common scenerios where we are swapping process * pages out. */ - if((mm->context ^ tlb_context_cache) & CTX_VERSION_MASK) + if(((mm->context ^ tlb_context_cache) & CTX_VERSION_MASK) || + (mm->cpu_vm_mask == 0)) { + /* A dead context cannot ever become "alive" until + * a task switch is done to it. + */ return; /* It's dead, nothing to do. */ - if(mm->cpu_vm_mask == (1UL << smp_processor_id())) - goto local_flush_and_out; + } + if(mm->cpu_vm_mask == (1UL << smp_processor_id())) { + __flush_tlb_page(ctx, page, SECONDARY_CONTEXT); + return; /* Only local flush is necessary. */ + } } -#endif smp_cross_call(&xcall_flush_tlb_page, ctx, page, 0); local_flush_and_out: @@ -644,6 +680,100 @@ __initfunc(void smp_tick_init(void)) prof_counter(boot_cpu_id) = prof_multiplier(boot_cpu_id) = 1; } +static inline unsigned long find_flush_base(unsigned long size) +{ + struct page *p = mem_map; + unsigned long found, base; + + size = PAGE_ALIGN(size); + found = size; + base = page_address(p); + while(found != 0) { + /* Failure. */ + if(p >= (mem_map + max_mapnr)) + return 0UL; + if(PageSkip(p)) { + p = p->next_hash; + base = page_address(p); + found = size; + } else { + found -= PAGE_SIZE; + p++; + } + } + return base; +} + +cycles_t cacheflush_time; + +__initfunc(static void smp_tune_scheduling (void)) +{ + unsigned long flush_base, flags, *p; + unsigned int ecache_size; + cycles_t tick1, tick2, raw; + + /* Approximate heuristic for SMP scheduling. It is an + * estimation of the time it takes to flush the L2 cache + * on the local processor. + * + * The ia32 chooses to use the L1 cache flush time instead, + * and I consider this complete nonsense. The Ultra can service + * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and + * L2 misses are what create extra bus traffic (ie. the "cost" + * of moving a process from one cpu to another). + */ + printk("SMP: Calibrating ecache flush... "); + ecache_size = prom_getintdefault(linux_cpus[0].prom_node, + "ecache-size", (512 *1024)); + flush_base = find_flush_base(ecache_size << 1); + + if(flush_base != 0UL) { + __save_and_cli(flags); + + /* Scan twice the size once just to get the TLB entries + * loaded and make sure the second scan measures pure misses. + */ + for(p = (unsigned long *)flush_base; + ((unsigned long)p) < (flush_base + (ecache_size<<1)); + p += (64 / sizeof(unsigned long))) + *((volatile unsigned long *)p); + + /* Now the real measurement. */ + __asm__ __volatile__(" + b,pt %%xcc, 1f + rd %%tick, %0 + + .align 64 +1: ldx [%2 + 0x000], %%g1 + ldx [%2 + 0x040], %%g2 + ldx [%2 + 0x080], %%g3 + ldx [%2 + 0x0c0], %%g5 + add %2, 0x100, %2 + cmp %2, %4 + bne,pt %%xcc, 1b + nop + + rd %%tick, %1" + : "=&r" (tick1), "=&r" (tick2), "=&r" (flush_base) + : "2" (flush_base), "r" (flush_base + ecache_size) + : "g1", "g2", "g3", "g5"); + + __restore_flags(flags); + + raw = (tick2 - tick1); + + /* Dampen it a little, considering two processes + * sharing the cache and fitting. + */ + cacheflush_time = (raw - (raw >> 2)); + } else + cacheflush_time = ((ecache_size << 2) + + (ecache_size << 1)); + + printk("Using heuristic of %d cycles.\n", + (int) cacheflush_time); +} + int __init setup_profiling_timer(unsigned int multiplier) { unsigned long flags; diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index a42505edc..305f37ad8 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -1,8 +1,9 @@ -/* $Id: sparc64_ksyms.c,v 1.49 1998/10/28 08:11:28 jj Exp $ +/* $Id: sparc64_ksyms.c,v 1.58 1999/05/08 03:00:31 davem Exp $ * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) */ /* Tell string.h we don't want memcpy etc. as cpp defines */ @@ -52,8 +53,9 @@ struct poll { short revents; }; -extern unsigned prom_cpu_nodes[NR_CPUS]; +extern unsigned prom_cpu_nodes[64]; extern void die_if_kernel(char *str, struct pt_regs *regs); +extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); extern unsigned long sunos_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); void _sigpause_common (unsigned int set, struct pt_regs *); @@ -88,7 +90,6 @@ extern int __ashrdi3(int, int); extern void dump_thread(struct pt_regs *, struct user *); #ifdef __SMP__ -extern spinlock_t scheduler_lock; extern spinlock_t kernel_flag; extern int smp_num_cpus; #ifdef SPIN_LOCK_DEBUG @@ -102,6 +103,8 @@ extern void _do_write_unlock(rwlock_t *rw); #endif #endif +extern unsigned long phys_base; + /* One thing to note is that the way the symbols of the mul/div * support routines are named is a mess, they all start with * a '.' which makes it a bitch to export, here is the trick: @@ -116,7 +119,6 @@ __attribute__((section("__ksymtab"))) = \ /* used by various drivers */ #ifdef __SMP__ /* Kernel wide locking */ -EXPORT_SYMBOL(scheduler_lock); EXPORT_SYMBOL(kernel_flag); /* Software-IRQ BH locking */ @@ -155,6 +157,8 @@ EXPORT_SYMBOL(_do_write_unlock); EXPORT_SYMBOL(local_irq_count); EXPORT_SYMBOL(local_bh_count); #endif + +EXPORT_SYMBOL(ivector_table); EXPORT_SYMBOL(enable_irq); EXPORT_SYMBOL(disable_irq); @@ -171,6 +175,7 @@ EXPORT_SYMBOL(sparc_dvma_malloc); EXPORT_SYMBOL(mmu_release_scsi_one); EXPORT_SYMBOL(mmu_release_scsi_sgl); #if CONFIG_SBUS +EXPORT_SYMBOL(mmu_set_sbus64); EXPORT_SYMBOL(SBus_chain); EXPORT_SYMBOL(dma_chain); #endif @@ -199,6 +204,9 @@ EXPORT_SYMBOL(dump_thread); /* math-emu wants this */ EXPORT_SYMBOL(die_if_kernel); +/* Kernel thread creation. */ +EXPORT_SYMBOL(kernel_thread); + /* prom symbols */ EXPORT_SYMBOL(idprom); EXPORT_SYMBOL(prom_root_node); @@ -214,6 +222,7 @@ EXPORT_SYMBOL(prom_setprop); EXPORT_SYMBOL(saved_command_line); EXPORT_SYMBOL(prom_getname); EXPORT_SYMBOL(prom_feval); +EXPORT_SYMBOL(prom_getbool); EXPORT_SYMBOL(prom_getstring); EXPORT_SYMBOL(prom_apply_sbus_ranges); EXPORT_SYMBOL(prom_getint); @@ -257,7 +266,6 @@ EXPORT_SYMBOL(svr4_setcontext); EXPORT_SYMBOL(prom_cpu_nodes); EXPORT_SYMBOL(sys_ioctl); EXPORT_SYMBOL(sys32_ioctl); -EXPORT_SYMBOL(get_unmapped_area); EXPORT_SYMBOL(move_addr_to_kernel); EXPORT_SYMBOL(move_addr_to_user); #endif @@ -281,6 +289,10 @@ EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__bzero_noasi); +/* Various address conversion macros use this. */ +EXPORT_SYMBOL(phys_base); +EXPORT_SYMBOL(sparc64_valid_addr_bitmap); + /* No version information on this, heavily used in inline asm, * and will always be 'void __ret_efault(void)'. */ diff --git a/arch/sparc64/kernel/starfire.c b/arch/sparc64/kernel/starfire.c new file mode 100644 index 000000000..38f33ecd6 --- /dev/null +++ b/arch/sparc64/kernel/starfire.c @@ -0,0 +1,121 @@ +/* $Id: starfire.c,v 1.2 1998/12/09 18:53:11 davem Exp $ + * starfire.c: Starfire/E10000 support. + * + * Copyright (C) 1998 David S. Miller (davem@dm.cobaltmicro.com) + */ + +#include <linux/kernel.h> +#include <linux/malloc.h> + +#include <asm/page.h> +#include <asm/oplib.h> +#include <asm/smp.h> + +/* A few places around the kernel check this to see if + * they need to call us to do things in a Starfire specific + * way. + */ +int this_is_starfire = 0; + +void starfire_check(void) +{ + int ssnode = prom_finddevice("/ssp-serial"); + + if(ssnode != 0 && ssnode != -1) { + int i; + + this_is_starfire = 1; + + /* Now must fixup cpu MIDs. OBP gave us a logical + * linear cpuid number, not the real upaid. + */ + for(i = 0; i < linux_num_cpus; i++) { + unsigned int mid = linux_cpus[i].mid; + + mid = (((mid & 0x3c) << 1) | + ((mid & 0x40) >> 4) | + (mid & 0x3)); + + linux_cpus[i].mid = mid; + } + } +} + +int starfire_hard_smp_processor_id(void) +{ + return *((unsigned int *) __va(0x1fff40000d0)); +} + +/* Each Starfire board has 32 registers which perform translation + * and delivery of traditional interrupt packets into the extended + * Starfire hardware format. Essentially UPAID's now have 2 more + * bits than in all previous Sun5 systems. + */ +struct starfire_irqinfo { + unsigned int *imap_slots[32]; + unsigned int *tregs[32]; + struct starfire_irqinfo *next; + int upaid, hwmid; +}; + +static struct starfire_irqinfo *sflist = NULL; + +/* Beam me up Scott(McNeil)y... */ +void *starfire_hookup(int upaid) +{ + struct starfire_irqinfo *p; + unsigned long treg_base, hwmid, i; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if(!p) { + prom_printf("starfire_hookup: No memory, this is insane.\n"); + prom_halt(); + } + treg_base = 0x100fc000000UL; + hwmid = ((upaid & 0x3c) << 1) | + ((upaid & 0x40) >> 4) | + (upaid & 0x3); + p->hwmid = hwmid; + treg_base += (hwmid << 33UL); + treg_base += 0x200UL; + for(i = 0; i < 32; i++) { + p->imap_slots[i] = NULL; + p->tregs[i] = __va(treg_base + (i * 0x10)); + } + p->upaid = upaid; + p->next = sflist; + sflist = p; + + return (void *) p; +} + +unsigned int starfire_translate(unsigned int *imap, + unsigned int upaid) +{ + struct starfire_irqinfo *p; + unsigned int bus_hwmid; + unsigned int i; + + bus_hwmid = (((unsigned long)imap) >> 33) & 0x7f; + for(p = sflist; p != NULL; p = p->next) + if(p->hwmid == bus_hwmid) + break; + if(p == NULL) { + prom_printf("XFIRE: Cannot find irqinfo for imap %016lx\n", + ((unsigned long)imap)); + prom_halt(); + } + for(i = 0; i < 32; i++) { + if(p->imap_slots[i] == imap || + p->imap_slots[i] == NULL) + break; + } + if(i == 32) { + printk("starfire_translate: Are you kidding me?\n"); + panic("Lucy in the sky...."); + } + p->imap_slots[i] = imap; + *(p->tregs[i]) = upaid; + + return i; +} diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 08ce07244..8d11f10b8 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -1,4 +1,4 @@ -/* $Id: sys_sparc.c,v 1.25 1998/10/21 03:21:15 davem Exp $ +/* $Id: sys_sparc.c,v 1.26 1999/01/07 19:07:01 jj Exp $ * linux/arch/sparc64/kernel/sys_sparc.c * * This file contains various random system calls that @@ -27,6 +27,8 @@ #include <asm/utrap.h> #include <asm/perfctr.h> +/* #define DEBUG_UNIMP_SYSCALL */ + /* XXX Make this per-binary type, this way we can detect the type of * XXX a binary. Every Sparc executable calls this very early on. */ @@ -200,11 +202,14 @@ asmlinkage unsigned long c_sys_nis_syscall (struct pt_regs *regs) { static int count=0; + + /* Don't make the system unusable, if someone goes stuck */ + if (count++ > 5) return -ENOSYS; lock_kernel(); - if (++count <= 20) { /* Don't make the system unusable, if someone goes stuck */ - printk ("Unimplemented SPARC system call %ld\n",regs->u_regs[1]); - show_regs (regs); - } + printk ("Unimplemented SPARC system call %ld\n",regs->u_regs[1]); +#ifdef DEBUG_UNIMP_SYSCALL + show_regs (regs); +#endif unlock_kernel(); return -ENOSYS; } diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 1a49380f1..a7f85ca58 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -1,4 +1,4 @@ -/* $Id: sys_sparc32.c,v 1.100 1998/11/08 11:14:00 davem Exp $ +/* $Id: sys_sparc32.c,v 1.107 1999/03/05 13:21:02 davem Exp $ * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -43,6 +43,7 @@ #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> +#include <linux/timex.h> #include <asm/types.h> #include <asm/ipc.h> @@ -50,6 +51,8 @@ #include <asm/fpumacro.h> #include <asm/semaphore.h> +#include <net/scm.h> + /* Use this to get at 32-bit user passed pointers. */ /* Things to consider: the low-level assembly stub does srl x, 0, x for first four arguments, so if you have @@ -74,15 +77,6 @@ __ret; \ }) -static inline char * get_page(void) -{ - char * res; - res = (char *)__get_free_page(GFP_KERNEL); - return res; -} - -#define putname32 putname - /* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. @@ -109,13 +103,13 @@ char * getname32(const char *filename) char *tmp, *result; result = ERR_PTR(-ENOMEM); - tmp = get_page(); + tmp = (char *)__get_free_page(GFP_KERNEL); if (tmp) { int retval = do_getname32(filename, tmp); result = tmp; if (retval < 0) { - putname32(tmp); + putname(tmp); result = ERR_PTR(retval); } } @@ -243,7 +237,10 @@ static int do_sys32_semctl(int first, int second, int third, void *uptr) err = -EFAULT; if (get_user (pad, (u32 *)uptr)) goto out; - fourth.__pad = (void *)A(pad); + if(third == SETVAL) + fourth.val = (int)pad; + else + fourth.__pad = (void *)A(pad); if (IPCOP_MASK (third) & (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (SEM_INFO) | IPCOP_MASK (GETVAL) | IPCOP_MASK (GETPID) | IPCOP_MASK (GETNCNT) | IPCOP_MASK (GETZCNT) | @@ -652,7 +649,7 @@ asmlinkage int sys32_quotactl(int cmd, const char *special, int id, unsigned lon set_fs (KERNEL_DS); err = sys_quotactl(cmd, (const char *)spec, id, (caddr_t)&d); set_fs (old_fs); - putname32 (spec); + putname (spec); if (cmds == Q_GETQUOTA) { __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; ((struct dqblk32 *)&d)->dqb_itime = i; @@ -696,7 +693,7 @@ asmlinkage int sys32_statfs(const char * path, struct statfs32 *buf) set_fs (KERNEL_DS); ret = sys_statfs((const char *)pth, &s); set_fs (old_fs); - putname32 (pth); + putname (pth); if (put_statfs(buf, &s)) return -EFAULT; } @@ -744,7 +741,7 @@ asmlinkage int sys32_utime(char * filename, struct utimbuf32 *times) set_fs (KERNEL_DS); ret = sys_utime(filenam, &t); set_fs (old_fs); - putname32 (filenam); + putname (filenam); } return ret; } @@ -796,8 +793,9 @@ static long do_readv_writev32(int type, struct file *file, } inode = file->f_dentry->d_inode; - retval = locks_verify_area((type == VERIFY_READ) ? - FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, + /* VERIFY_WRITE actually means a read, as we write to user space */ + retval = locks_verify_area((type == VERIFY_WRITE + ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), inode, file, file->f_pos, tot_len); if (retval) { if (iov != iovstack) @@ -1106,13 +1104,17 @@ set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) __put_user(*fdset, ufdset); } +#define MAX_SELECT_SECONDS \ + ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) + asmlinkage int sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, u32 tvp_x) { - fd_set_buffer *fds; + fd_set_bits fds; struct timeval32 *tvp = (struct timeval32 *)AA(tvp_x); + char *bits; unsigned long nn; long timeout; - int ret; + int ret, size; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -1123,30 +1125,47 @@ asmlinkage int sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, u32 tvp_x) || (ret = __get_user(usec, &tvp->tv_usec))) goto out_nofds; - timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); - timeout += sec * HZ; + ret = -EINVAL; + if(sec < 0 || usec < 0) + goto out_nofds; + + if ((unsigned long) sec < MAX_SELECT_SECONDS) { + timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); + timeout += sec * (unsigned long) HZ; + } } + ret = -EINVAL; + if (n < 0 || n > KFDS_NR) + goto out_nofds; + + /* + * We need 6 bitmaps (in/out/ex for both incoming and outgoing), + * since we used fdset we need to allocate memory in units of + * long-words. + */ ret = -ENOMEM; - fds = (fd_set_buffer *) __get_free_page(GFP_KERNEL); - if (!fds) + size = FDS_BYTES(n); + bits = kmalloc(6 * size, GFP_KERNEL); + if (!bits) goto out_nofds; - ret = -EINVAL; - if (n < 0) - goto out; - if (n > KFDS_NR) - n = KFDS_NR; + fds.in = (unsigned long *) bits; + fds.out = (unsigned long *) (bits + size); + fds.ex = (unsigned long *) (bits + 2*size); + fds.res_in = (unsigned long *) (bits + 3*size); + fds.res_out = (unsigned long *) (bits + 4*size); + fds.res_ex = (unsigned long *) (bits + 5*size); nn = (n + 8*sizeof(u32) - 1) / (8*sizeof(u32)); - if ((ret = get_fd_set32(nn, fds->in, inp)) || - (ret = get_fd_set32(nn, fds->out, outp)) || - (ret = get_fd_set32(nn, fds->ex, exp))) + if ((ret = get_fd_set32(nn, fds.in, inp)) || + (ret = get_fd_set32(nn, fds.out, outp)) || + (ret = get_fd_set32(nn, fds.ex, exp))) goto out; - zero_fd_set(n, fds->res_in); - zero_fd_set(n, fds->res_out); - zero_fd_set(n, fds->res_ex); + zero_fd_set(n, fds.res_in); + zero_fd_set(n, fds.res_out); + zero_fd_set(n, fds.res_ex); - ret = do_select(n, fds, &timeout); + ret = do_select(n, &fds, &timeout); if (tvp && !(current->personality & STICKY_TIMEOUTS)) { time_t sec = 0, usec = 0; @@ -1168,12 +1187,12 @@ asmlinkage int sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, u32 tvp_x) ret = 0; } - set_fd_set32(nn, inp, fds->res_in); - set_fd_set32(nn, outp, fds->res_out); - set_fd_set32(nn, exp, fds->res_ex); + set_fd_set32(nn, inp, fds.res_in); + set_fd_set32(nn, outp, fds.res_out); + set_fd_set32(nn, exp, fds.res_ex); out: - free_page ((unsigned long)fds); + kfree(bits); out_nofds: return ret; } @@ -1213,7 +1232,7 @@ asmlinkage int sys32_newstat(char * filename, struct stat32 *statbuf) set_fs (KERNEL_DS); ret = sys_newstat(filenam, &s); set_fs (old_fs); - putname32 (filenam); + putname (filenam); if (putstat (statbuf, &s)) return -EFAULT; } @@ -1235,7 +1254,7 @@ asmlinkage int sys32_newlstat(char * filename, struct stat32 *statbuf) set_fs (KERNEL_DS); ret = sys_newlstat(filenam, &s); set_fs (old_fs); - putname32 (filenam); + putname (filenam); if (putstat (statbuf, &s)) return -EFAULT; } @@ -2010,74 +2029,6 @@ asmlinkage int sys32_getrusage(int who, struct rusage32 *ru) return ret; } -struct timex32 { - unsigned int modes; - s32 offset; - s32 freq; - s32 maxerror; - s32 esterror; - int status; - s32 constant; - s32 precision; - s32 tolerance; - struct timeval32 time; - s32 tick; - s32 ppsfreq; - s32 jitter; - int shift; - s32 stabil; - s32 jitcnt; - s32 calcnt; - s32 errcnt; - s32 stbcnt; - int :32; int :32; int :32; int :32; - int :32; int :32; int :32; int :32; - int :32; int :32; int :32; int :32; -}; - -extern int do_adjtimex(struct timex *); - -asmlinkage int sys32_adjtimex(struct timex32 *txc_p) -{ - struct timex t; - int ret; - - ret = get_user (t.modes, &txc_p->modes); - ret |= __get_user (t.offset, &txc_p->offset); - ret |= __get_user (t.freq, &txc_p->freq); - ret |= __get_user (t.maxerror, &txc_p->maxerror); - ret |= __get_user (t.esterror, &txc_p->esterror); - ret |= __get_user (t.status, &txc_p->status); - ret |= __get_user (t.constant, &txc_p->constant); - ret |= __get_user (t.tick, &txc_p->tick); - ret |= __get_user (t.shift, &txc_p->shift); - if (ret || (ret = do_adjtimex(&t))) - return ret; - ret = __put_user (t.modes, &txc_p->modes); - ret |= __put_user (t.offset, &txc_p->offset); - ret |= __put_user (t.freq, &txc_p->freq); - ret |= __put_user (t.maxerror, &txc_p->maxerror); - ret |= __put_user (t.esterror, &txc_p->esterror); - ret |= __put_user (t.status, &txc_p->status); - ret |= __put_user (t.constant, &txc_p->constant); - ret |= __put_user (t.precision, &txc_p->precision); - ret |= __put_user (t.tolerance, &txc_p->tolerance); - ret |= __put_user (t.time.tv_sec, &txc_p->time.tv_sec); - ret |= __put_user (t.time.tv_usec, &txc_p->time.tv_usec); - ret |= __put_user (t.tick, &txc_p->tick); - ret |= __put_user (t.ppsfreq, &txc_p->ppsfreq); - ret |= __put_user (t.jitter, &txc_p->jitter); - ret |= __put_user (t.shift, &txc_p->shift); - ret |= __put_user (t.stabil, &txc_p->stabil); - ret |= __put_user (t.jitcnt, &txc_p->jitcnt); - ret |= __put_user (t.calcnt, &txc_p->calcnt); - ret |= __put_user (t.errcnt, &txc_p->errcnt); - ret |= __put_user (t.stbcnt, &txc_p->stbcnt); - if (!ret) - ret = time_state; - return ret; -} - /* XXX This really belongs in some header file... -DaveM */ #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, @@ -2131,9 +2082,44 @@ struct cmsghdr32 { __kernel_size_t32 cmsg_len; int cmsg_level; int cmsg_type; - unsigned char cmsg_data[0]; }; +/* Bleech... */ +#define __CMSG32_NXTHDR(ctl, len, cmsg, cmsglen) __cmsg32_nxthdr((ctl),(len),(cmsg),(cmsglen)) +#define CMSG32_NXTHDR(mhdr, cmsg, cmsglen) cmsg32_nxthdr((mhdr), (cmsg), (cmsglen)) + +#define CMSG32_ALIGN(len) ( ((len)+sizeof(int)-1) & ~(sizeof(int)-1) ) + +#define CMSG32_DATA(cmsg) ((void *)((char *)(cmsg) + CMSG32_ALIGN(sizeof(struct cmsghdr32)))) +#define CMSG32_SPACE(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + CMSG32_ALIGN(len)) +#define CMSG32_LEN(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + (len)) + +#define __CMSG32_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr32) ? \ + (struct cmsghdr32 *)(ctl) : \ + (struct cmsghdr32 *)NULL) +#define CMSG32_FIRSTHDR(msg) __CMSG32_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) + +__inline__ struct cmsghdr32 *__cmsg32_nxthdr(void *__ctl, __kernel_size_t __size, + struct cmsghdr32 *__cmsg, int __cmsg_len) +{ + struct cmsghdr32 * __ptr; + + __ptr = (struct cmsghdr32 *)(((unsigned char *) __cmsg) + + CMSG32_ALIGN(__cmsg_len)); + if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size) + return NULL; + + return __ptr; +} + +__inline__ struct cmsghdr32 *cmsg32_nxthdr (struct msghdr *__msg, + struct cmsghdr32 *__cmsg, + int __cmsg_len) +{ + return __cmsg32_nxthdr(__msg->msg_control, __msg->msg_controllen, + __cmsg, __cmsg_len); +} + static inline int iov_from_user32_to_kern(struct iovec *kiov, struct iovec32 *uiov32, int niov) @@ -2175,6 +2161,7 @@ static inline int msghdr_from_user32_to_kern(struct msghdr *kmsg, kmsg->msg_control = (void *)A(tmp3); err = get_user(kmsg->msg_namelen, &umsg->msg_namelen); + err |= get_user(kmsg->msg_iovlen, &umsg->msg_iovlen); err |= get_user(kmsg->msg_controllen, &umsg->msg_controllen); err |= get_user(kmsg->msg_flags, &umsg->msg_flags); @@ -2217,6 +2204,165 @@ static int verify_iovec32(struct msghdr *kern_msg, struct iovec *kern_iov, return tot_len; } +/* There is a lot of hair here because the alignment rules (and + * thus placement) of cmsg headers and length are different for + * 32-bit apps. -DaveM + */ +static int cmsghdr_from_user32_to_kern(struct msghdr *kmsg, + unsigned char *stackbuf, int stackbuf_size) +{ + struct cmsghdr32 *ucmsg; + struct cmsghdr *kcmsg, *kcmsg_base; + __kernel_size_t32 ucmlen; + __kernel_size_t kcmlen, tmp; + + kcmlen = 0; + kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; + ucmsg = CMSG32_FIRSTHDR(kmsg); + while(ucmsg != NULL) { + if(get_user(ucmlen, &ucmsg->cmsg_len)) + return -EFAULT; + + /* Catch bogons. */ + if(CMSG32_ALIGN(ucmlen) < + CMSG32_ALIGN(sizeof(struct cmsghdr32))) + return -EINVAL; + if((unsigned long)(((char *)ucmsg - (char *)kmsg->msg_control) + + ucmlen) > kmsg->msg_controllen) + return -EINVAL; + + tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) + + CMSG_ALIGN(sizeof(struct cmsghdr))); + kcmlen += tmp; + ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen); + } + if(kcmlen == 0) + return -EINVAL; + + /* The kcmlen holds the 64-bit version of the control length. + * It may not be modified as we do not stick it into the kmsg + * until we have successfully copied over all of the data + * from the user. + */ + if(kcmlen > stackbuf_size) + kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL); + if(kcmsg == NULL) + return -ENOBUFS; + + /* Now copy them over neatly. */ + memset(kcmsg, 0, kcmlen); + ucmsg = CMSG32_FIRSTHDR(kmsg); + while(ucmsg != NULL) { + __get_user(ucmlen, &ucmsg->cmsg_len); + tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) + + CMSG_ALIGN(sizeof(struct cmsghdr))); + kcmsg->cmsg_len = tmp; + __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level); + __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type); + + /* Copy over the data. */ + if(copy_from_user(CMSG_DATA(kcmsg), + CMSG32_DATA(ucmsg), + (ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))))) + goto out_free_efault; + + /* Advance. */ + kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp)); + ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen); + } + + /* Ok, looks like we made it. Hook it up and return success. */ + kmsg->msg_control = kcmsg_base; + kmsg->msg_controllen = kcmlen; + return 0; + +out_free_efault: + if(kcmsg_base != (struct cmsghdr *)stackbuf) + kfree(kcmsg_base); + return -EFAULT; +} + +static void put_cmsg32(struct msghdr *kmsg, int level, int type, + int len, void *data) +{ + struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control; + struct cmsghdr32 cmhdr; + int cmlen = CMSG32_LEN(len); + + if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { + kmsg->msg_flags |= MSG_CTRUNC; + return; + } + + if(kmsg->msg_controllen < cmlen) { + kmsg->msg_flags |= MSG_CTRUNC; + cmlen = kmsg->msg_controllen; + } + cmhdr.cmsg_level = level; + cmhdr.cmsg_type = type; + cmhdr.cmsg_len = cmlen; + + if(copy_to_user(cm, &cmhdr, sizeof cmhdr)) + return; + if(copy_to_user(CMSG32_DATA(cm), data, cmlen - sizeof(struct cmsghdr32))) + return; + cmlen = CMSG32_SPACE(len); + kmsg->msg_control += cmlen; + kmsg->msg_controllen -= cmlen; +} + +static void scm_detach_fds32(struct msghdr *kmsg, struct scm_cookie *scm) +{ + struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control; + int fdmax = (kmsg->msg_controllen - sizeof(struct cmsghdr32)) / sizeof(int); + int fdnum = scm->fp->count; + struct file **fp = scm->fp->fp; + int *cmfptr; + int err = 0, i; + + if (fdnum < fdmax) + fdmax = fdnum; + + for (i = 0, cmfptr = (int *) CMSG32_DATA(cm); i < fdmax; i++, cmfptr++) { + int new_fd; + err = get_unused_fd(); + if (err < 0) + break; + new_fd = err; + err = put_user(new_fd, cmfptr); + if (err) { + put_unused_fd(new_fd); + break; + } + /* Bump the usage count and install the file. */ + fp[i]->f_count++; + current->files->fd[new_fd] = fp[i]; + } + + if (i > 0) { + int cmlen = CMSG32_LEN(i * sizeof(int)); + if (!err) + err = put_user(SOL_SOCKET, &cm->cmsg_level); + if (!err) + err = put_user(SCM_RIGHTS, &cm->cmsg_type); + if (!err) + err = put_user(cmlen, &cm->cmsg_len); + if (!err) { + cmlen = CMSG32_SPACE(i * sizeof(int)); + kmsg->msg_control += cmlen; + kmsg->msg_controllen -= cmlen; + } + } + if (i < fdnum) + kmsg->msg_flags |= MSG_CTRUNC; + + /* + * All of the files that fit in the message have had their + * usage counts incremented, so we just free the list. + */ + __scm_destroy(scm); +} + asmlinkage int sys32_sendmsg(int fd, struct msghdr32 *user_msg, unsigned user_flags) { struct socket *sock; @@ -2237,25 +2383,10 @@ asmlinkage int sys32_sendmsg(int fd, struct msghdr32 *user_msg, unsigned user_fl total_len = err; if(kern_msg.msg_controllen) { - struct cmsghdr32 *ucmsg = (struct cmsghdr32 *)kern_msg.msg_control; - unsigned long *kcmsg; - __kernel_size_t32 cmlen; - - if(kern_msg.msg_controllen > sizeof(ctl) && - kern_msg.msg_controllen <= 256) { - err = -ENOBUFS; - ctl_buf = kmalloc(kern_msg.msg_controllen, GFP_KERNEL); - if(!ctl_buf) - goto out_freeiov; - } - __get_user(cmlen, &ucmsg->cmsg_len); - kcmsg = (unsigned long *) ctl_buf; - *kcmsg++ = (unsigned long)cmlen; - err = -EFAULT; - if(copy_from_user(kcmsg, &ucmsg->cmsg_level, - kern_msg.msg_controllen - sizeof(__kernel_size_t32))) - goto out_freectl; - kern_msg.msg_control = ctl_buf; + err = cmsghdr_from_user32_to_kern(&kern_msg, ctl, sizeof(ctl)); + if(err) + goto out_freeiov; + ctl_buf = kern_msg.msg_control; } kern_msg.msg_flags = user_flags; @@ -2269,7 +2400,6 @@ asmlinkage int sys32_sendmsg(int fd, struct msghdr32 *user_msg, unsigned user_fl } unlock_kernel(); -out_freectl: /* N.B. Use kfree here, as kern_msg.msg_controllen might change? */ if(ctl_buf != ctl) kfree(ctl_buf); @@ -2310,26 +2440,43 @@ asmlinkage int sys32_recvmsg(int fd, struct msghdr32 *user_msg, unsigned int use lock_kernel(); sock = sockfd_lookup(fd, &err); if (sock != NULL) { + struct scm_cookie scm; + if (sock->file->f_flags & O_NONBLOCK) user_flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &kern_msg, total_len, user_flags); - if(err >= 0) + memset(&scm, 0, sizeof(scm)); + err = sock->ops->recvmsg(sock, &kern_msg, total_len, + user_flags, &scm); + if(err >= 0) { len = err; + if(!kern_msg.msg_control) { + if(sock->passcred || scm.fp) + kern_msg.msg_flags |= MSG_CTRUNC; + if(scm.fp) + __scm_destroy(&scm); + } else { + /* Wheee... */ + if(sock->passcred) + put_cmsg32(&kern_msg, + SOL_SOCKET, SCM_CREDENTIALS, + sizeof(scm.creds), &scm.creds); + if(scm.fp != NULL) + scm_detach_fds32(&kern_msg, &scm); + } + } sockfd_put(sock); } unlock_kernel(); if(uaddr != NULL && err >= 0) err = move_addr_to_user(addr, kern_msg.msg_namelen, uaddr, uaddr_len); - if(err >= 0) { - err = __put_user(kern_msg.msg_flags, &user_msg->msg_flags); - if(!err) { - /* XXX Convert cmsg back into userspace 32-bit format... */ - err = __put_user((unsigned long)kern_msg.msg_control - cmsg_ptr, - &user_msg->msg_controllen); - } + if(cmsg_ptr != 0 && err >= 0) { + u32 ucmsg_ptr = ((u32)(unsigned long)kern_msg.msg_control); + err = __put_user(ucmsg_ptr, &user_msg->msg_control); + err |= __put_user(kern_msg.msg_controllen, &user_msg->msg_controllen); } - + if(err >= 0) + err = __put_user(kern_msg.msg_flags, &user_msg->msg_flags); if(kern_msg.msg_iov != iov) kfree(kern_msg.msg_iov); out: @@ -2653,7 +2800,7 @@ asmlinkage int sparc32_execve(struct pt_regs *regs) error = do_execve32(filename, (u32 *)AA((u32)regs->u_regs[base + UREG_I1]), (u32 *)AA((u32)regs->u_regs[base + UREG_I2]), regs); - putname32(filename); + putname(filename); if(!error) { fprs_write(0); @@ -2943,8 +3090,10 @@ qm_info(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) info.addr = (unsigned long)mod; info.size = mod->size; info.flags = mod->flags; - info.usecount = (mod_member_present(mod, can_unload) - && mod->can_unload ? -1 : mod->usecount); + info.usecount = + ((mod_member_present(mod, can_unload) + && mod->can_unload) + ? -1 : atomic_read(&mod->uc.usecount)); if (copy_to_user(buf, &info, sizeof(struct module_info32))) return -EFAULT; @@ -3452,7 +3601,7 @@ asmlinkage int sys32_utimes(char *filename, struct timeval32 *tvs) ret = sys_utimes(kfilename, &ktvs[0]); set_fs(old_fs); - putname32(kfilename); + putname(kfilename); } return ret; } @@ -3577,3 +3726,76 @@ asmlinkage int sys32_sendfile(int out_fd, int in_fd, __kernel_off_t32 *offset, s return ret; } + +/* Handle adjtimex compatability. */ + +struct timex32 { + u32 modes; + s32 offset, freq, maxerror, esterror; + s32 status, constant, precision, tolerance; + struct timeval32 time; + s32 tick; + s32 ppsfreq, jitter, shift, stabil; + s32 jitcnt, calcnt, errcnt, stbcnt; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; +}; + +extern int do_adjtimex(struct timex *); + +asmlinkage int sys32_adjtimex(struct timex32 *utp) +{ + struct timex txc; + int ret; + + memset(&txc, 0, sizeof(struct timex)); + + if(get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + if(put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 11c86ef5f..d2a75033a 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -1,4 +1,4 @@ -/* $Id: systbls.S,v 1.50 1998/10/07 01:27:27 davem Exp $ +/* $Id: systbls.S,v 1.53 1999/04/07 17:14:11 davem Exp $ * systbls.S: System call entry point tables for OS compatibility. * The native Linux system call table lives here also. * @@ -26,7 +26,7 @@ sys_call_table32: /*30*/ .word sys32_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice .word sys_nis_syscall, sys_sync, sys_kill, sys32_newstat, sys32_sendfile /*40*/ .word sys32_newlstat, sys_dup, sys_pipe, sys32_times, sys_nis_syscall - .word sys_nis_syscall, sys_setgid, sys_getgid, sys_signal, sys_geteuid + .word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid /*50*/ .word sys_getegid, sys_acct, sys_nis_syscall, sys_nis_syscall, sys32_ioctl .word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys32_execve /*60*/ .word sys_umask, sys_chroot, sys32_newfstat, sys_nis_syscall, sys_getpagesize @@ -48,7 +48,7 @@ sys_call_table32: /*140*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_getrlimit .word sys32_setrlimit, sys_nis_syscall, sys32_prctl, sys32_pciconfig_read, sys32_pciconfig_write /*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_umount + .word sys_nis_syscall, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_nis_syscall /*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_getdents @@ -85,7 +85,7 @@ sys_call_table: /*30*/ .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice .word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile /*40*/ .word sys_newlstat, sys_dup, sys_pipe, sys_times, sys_nis_syscall - .word sys_nis_syscall, sys_setgid, sys_getgid, sys_signal, sys_geteuid + .word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid /*50*/ .word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl .word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys_execve /*60*/ .word sys_umask, sys_chroot, sys_newfstat, sys_nis_syscall, sys_getpagesize @@ -107,7 +107,7 @@ sys_call_table: /*140*/ .word sys_nis_syscall, sys_getpeername, sys_nis_syscall, sys_nis_syscall, sys_getrlimit .word sys_setrlimit, sys_nis_syscall, sys_prctl, sys_pciconfig_read, sys_pciconfig_write /*150*/ .word sys_getsockname, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_umount + .word sys_nis_syscall, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_utrap_install .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall /*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents @@ -188,7 +188,7 @@ sunos_sys_table: /*150*/ .word sys_getsockname, sunos_nosys, sunos_nosys .word sys_poll, sunos_nosys, sunos_nosys .word sunos_getdirentries, sys32_statfs, sys32_fstatfs - .word sys_umount, sunos_nosys, sunos_nosys + .word sys_oldumount, sunos_nosys, sunos_nosys .word sys_getdomainname, sys_setdomainname .word sunos_nosys, sys32_quotactl, sunos_nosys .word sunos_mount, sys_ustat, sunos_semsys diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index c826ce56d..0b72e6e0b 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -1,4 +1,4 @@ -/* $Id: time.c,v 1.16 1998/09/05 17:25:28 jj Exp $ +/* $Id: time.c,v 1.20 1999/03/15 22:13:40 davem Exp $ * time.c: UltraSparc timer and TOD clock support. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -31,6 +31,8 @@ #include <asm/pbm.h> #include <asm/ebus.h> +extern rwlock_t xtime_lock; + struct mostek48t02 *mstk48t02_regs = 0; static struct mostek48t08 *mstk48t08_regs = 0; static struct mostek48t59 *mstk48t59_regs = 0; @@ -69,6 +71,8 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) { unsigned long ticks; + write_lock(&xtime_lock); + do { do_timer(regs); @@ -82,11 +86,15 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) } while (ticks >= timer_tick_compare); timer_check_rtc(); + + write_unlock(&xtime_lock); } #ifdef __SMP__ void timer_tick_interrupt(struct pt_regs *regs) { + write_lock(&xtime_lock); + do_timer(regs); /* @@ -99,6 +107,8 @@ void timer_tick_interrupt(struct pt_regs *regs) : "r" (timer_tick_offset)); timer_check_rtc(); + + write_unlock(&xtime_lock); } #endif @@ -256,13 +266,17 @@ void __init clock_probe(void) node = prom_getchild(busnd); while(1) { - prom_getstring(node, "model", model, sizeof(model)); + if (!node) + model[0] = 0; + else + prom_getstring(node, "model", model, sizeof(model)); if(strcmp(model, "mk48t02") && strcmp(model, "mk48t08") && strcmp(model, "mk48t59")) { - node = prom_getsibling(node); + if (node) + node = prom_getsibling(node); #ifdef CONFIG_PCI - if ((node == 0) && ebus) { + while ((node == 0) && ebus) { ebus = ebus->next; if (ebus) { busnd = ebus->prom_node; @@ -397,6 +411,9 @@ static __inline__ unsigned long do_gettimeoffset(void) return ticks / timer_ticks_per_usec; } +/* This need not obtain the xtime_lock as it is coded in + * an implicitly SMP safe way already. + */ void do_gettimeofday(struct timeval *tv) { /* Load doubles must be used on xtime so that what we get @@ -450,7 +467,7 @@ void do_gettimeofday(struct timeval *tv) void do_settimeofday(struct timeval *tv) { - cli(); + write_lock_irq(&xtime_lock); tv->tv_usec -= do_gettimeoffset(); if(tv->tv_usec < 0) { @@ -461,10 +478,10 @@ void do_settimeofday(struct timeval *tv) xtime = *tv; time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; - time_state = TIME_ERROR; /* p. 24, (a) */ time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - sti(); + + write_unlock_irq(&xtime_lock); } static int set_rtc_mmss(unsigned long nowtime) diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 9c0498348..2c19cc39d 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -1,4 +1,4 @@ -/* $Id: trampoline.S,v 1.6 1998/10/11 06:58:23 davem Exp $ +/* $Id: trampoline.S,v 1.8 1998/12/09 21:01:15 davem Exp $ * trampoline.S: Jump start slave processors on sparc64. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -34,8 +34,8 @@ sparc64_cpu_startup: sllx %g4, 32, %g4 /* XXX Buggy PROM... */ - srl %o0, 0, %g6 - add %g6, %g4, %g6 + srl %o0, 0, %o0 + ldx [%o0], %g6 sethi %uhi(_PAGE_VALID | _PAGE_SZ4MB), %g5 sllx %g5, 32, %g5 @@ -197,10 +197,18 @@ bounce: #undef KERN_LOWBITS #undef VPTE_BASE + /* Setup interrupt globals, we are always SMP. */ wrpr %o1, (PSTATE_IG | PSTATE_IE), %pstate - sethi %hi(ivector_to_mask), %g5 - or %g5, %lo(ivector_to_mask), %g1 - mov 0x40, %g2 + + /* Get our UPA MID. */ + lduw [%o2 + AOFF_task_processor], %g1 + sethi %hi(cpu_data), %g5 + or %g5, %lo(cpu_data), %g5 + + /* In theory this is: &(cpu_data[this_upamid].irq_worklists[0]) */ + sllx %g1, 7, %g1 + add %g5, %g1, %g1 + add %g1, 64, %g1 wrpr %g0, 0, %wstate or %o1, PSTATE_IE, %o1 diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index cb5180fff..5f7049822 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -1,8 +1,8 @@ -/* $Id: traps.c,v 1.55 1998/10/11 06:58:22 davem Exp $ +/* $Id: traps.c,v 1.58 1999/03/29 12:38:10 jj Exp $ * arch/sparc64/kernel/traps.c * * Copyright (C) 1995,1997 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997,1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ /* @@ -406,8 +406,6 @@ void do_fpe_common(struct pt_regs *regs) void do_fpieee(struct pt_regs *regs) { #ifdef DEBUG_FPU - save_and_clear_fpu(); - printk("fpieee %016lx\n", current->tss.xfsr[0]); #endif do_fpe_common(regs); @@ -420,7 +418,6 @@ void do_fpother(struct pt_regs *regs) struct fpustate *f = FPUSTATE; int ret = 0; - save_and_clear_fpu(); switch ((current->tss.xfsr[0] & 0x1c000)) { case (2 << 14): /* unfinished_FPop */ case (3 << 14): /* unimplemented_FPop */ @@ -428,7 +425,7 @@ void do_fpother(struct pt_regs *regs) break; } if (ret) return; -#ifdef DEBUG_FPU +#ifdef DEBUG_FPU printk("fpother %016lx\n", current->tss.xfsr[0]); #endif do_fpe_common(regs); @@ -462,6 +459,9 @@ void instruction_dump (unsigned int *pc) void die_if_kernel(char *str, struct pt_regs *regs) { + extern void __show_regs(struct pt_regs * regs); + extern void smp_report_regs(void); + /* Amuse the user. */ printk( " \\|/ ____ \\|/\n" @@ -471,7 +471,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) printk("%s(%d): %s\n", current->comm, current->pid, str); __asm__ __volatile__("flushw"); - show_regs(regs); + __show_regs(regs); { struct reg_window *rw = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS); @@ -491,6 +491,10 @@ void die_if_kernel(char *str, struct pt_regs *regs) printk("Instruction DUMP:"); instruction_dump ((unsigned int *) regs->tpc); } +#ifdef __SMP__ + smp_report_regs(); +#endif + lock_kernel(); /* Or else! */ if(regs->tstate & TSTATE_PRIV) do_exit(SIGKILL); @@ -498,7 +502,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) } extern int handle_popc(u32 insn, struct pt_regs *regs); -extern int handle_ldq_stq(u32 insn, struct pt_regs *regs); +extern int handle_ldf_stq(u32 insn, struct pt_regs *regs); void do_illegal_instruction(struct pt_regs *regs) { @@ -515,7 +519,7 @@ void do_illegal_instruction(struct pt_regs *regs) if (handle_popc(insn, regs)) return; } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { - if (handle_ldq_stq(insn, regs)) + if (handle_ldf_stq(insn, regs)) return; } } diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 656d29454..3a9fdf4d2 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -1,4 +1,4 @@ -/* $Id: ttable.S,v 1.27 1998/09/25 01:09:10 davem Exp $ +/* $Id: ttable.S,v 1.28 1999/03/29 12:38:10 jj Exp $ * ttable.S: Sparc V9 Trap Table(s) with SpitFire extensions. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -21,8 +21,8 @@ tl0_resv012: BTRAP(0x12) BTRAP(0x13) BTRAP(0x14) BTRAP(0x15) BTRAP(0x16) BTRAP(0 tl0_resv018: BTRAP(0x18) BTRAP(0x19) BTRAP(0x1a) BTRAP(0x1b) BTRAP(0x1c) BTRAP(0x1d) tl0_resv01e: BTRAP(0x1e) BTRAP(0x1f) tl0_fpdis: TRAP_NOSAVE(do_fpdis) -tl0_fpieee: TRAP(do_fpieee) -tl0_fpother: TRAP(do_fpother) +tl0_fpieee: TRAP_SAVEFPU(do_fpieee) +tl0_fpother: TRAP_SAVEFPU(do_fpother) tl0_tof: TRAP(do_tof) tl0_cwin: CLEAN_WINDOW tl0_div0: TRAP(do_div0) diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index d32309b9d..f4599bbdb 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -1,4 +1,4 @@ -/* $Id: unaligned.c,v 1.13 1998/10/07 22:43:13 davem Exp $ +/* $Id: unaligned.c,v 1.15 1999/04/03 11:36:21 anton Exp $ * unaligned.c: Unaligned load/store trap handling with special * cases for the kernel to do them more quickly. * @@ -374,7 +374,6 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u enum direction dir = decode_direction(insn); int size = decode_access_size(insn); - lock_kernel(); if(!ok_for_kernel(insn) || dir == both) { printk("Unsupported unaligned load/store trap for kernel at <%016lx>.\n", regs->tpc); @@ -423,7 +422,6 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u } advance(regs); } - unlock_kernel(); } static char popc_helper[] = { @@ -470,7 +468,7 @@ extern void do_fpother(struct pt_regs *regs); extern void do_privact(struct pt_regs *regs); extern void data_access_exception(struct pt_regs *regs); -int handle_ldq_stq(u32 insn, struct pt_regs *regs) +int handle_ldf_stq(u32 insn, struct pt_regs *regs) { unsigned long addr = compute_effective_address(regs, insn, 0); int freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); @@ -522,8 +520,10 @@ int handle_ldq_stq(u32 insn, struct pt_regs *regs) return 1; } } else { - /* LDQ */ - u32 first, second, third, fourth; + /* LDF, LDDF, LDQF */ + u32 data[4] __attribute__ ((aligned(8))); + int size, i; + int err; if (asi < 0x80) { do_privact(regs); @@ -532,25 +532,35 @@ int handle_ldq_stq(u32 insn, struct pt_regs *regs) data_access_exception(regs); return 1; } - if (get_user (first, (u32 *)addr) || - __get_user (second, (u32 *)(addr + 4)) || - __get_user (third, (u32 *)(addr + 8)) || - __get_user (fourth, (u32 *)(addr + 12))) { - if (asi & 0x2) /* NF */ { - first = 0; second = 0; third = 0; fourth = 0; - } else { - data_access_exception(regs); - return 1; - } + switch (insn & 0x180000) { + case 0x000000: size = 1; break; + case 0x100000: size = 4; break; + default: size = 2; break; + } + for (i = 0; i < size; i++) + data[i] = 0; + + err = get_user (data[0], (u32 *)addr); + if (!err) { + for (i = 1; i < size; i++) + err |= __get_user (data[i], (u32 *)(addr + 4*i)); + } + if (err && !(asi & 0x2 /* NF */)) { + data_access_exception(regs); + return 1; } if (asi & 0x8) /* Little */ { - u32 tmp = le32_to_cpup(&first); - - first = le32_to_cpup(&fourth); - fourth = tmp; - tmp = le32_to_cpup(&second); - second = le32_to_cpup(&third); - third = tmp; + u64 tmp; + + switch (size) { + case 1: data[0] = le32_to_cpup(data + 0); break; + default:*(u64 *)(data + 0) = le64_to_cpup((u64 *)(data + 0)); + break; + case 4: tmp = le64_to_cpup((u64 *)(data + 0)); + *(u64 *)(data + 0) = le64_to_cpup((u64 *)(data + 2)); + *(u64 *)(data + 2) = tmp; + break; + } } if (!(current->tss.fpsaved[0] & FPRS_FEF)) { current->tss.fpsaved[0] = FPRS_FEF; @@ -562,16 +572,27 @@ int handle_ldq_stq(u32 insn, struct pt_regs *regs) else memset(f->regs+32, 0, 32*sizeof(u32)); } - f->regs[freg] = first; - f->regs[freg+1] = second; - f->regs[freg+2] = third; - f->regs[freg+3] = fourth; + memcpy(f->regs + freg, data, size * 4); current->tss.fpsaved[0] |= flag; } advance(regs); return 1; } +void handle_ld_nf(u32 insn, struct pt_regs *regs) +{ + int rd = ((insn >> 25) & 0x1f); + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + unsigned long *reg; + + maybe_flush_windows(0, 0, rd, from_kernel); + reg = fetch_reg_addr(rd, regs); + if ((insn & 0x780000) == 0x180000) + reg[1] = 0; + reg[0] = 0; + advance(regs); +} + void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { unsigned long pc = regs->tpc; |