diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1999-06-22 23:05:57 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1999-06-22 23:05:57 +0000 |
commit | 51d3b7814cdccef9188240fe0cbd8d97ff2c7470 (patch) | |
tree | 5cbb01d0323d4f63ade66bdf48ba4a91aaa6df16 /arch | |
parent | 52273a23c9a84336b93a35e4847fc88fac7eb0e4 (diff) |
Merge with Linux 2.3.7.
WARNING: 2.3.7 is known to eat filesystems for breakfast and little
children for lunch, so if you try this on your machine make backups
first ...
Diffstat (limited to 'arch')
98 files changed, 15174 insertions, 2550 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 967ee6766..1c198989d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,21 +10,31 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1995, 1996 by Russell King +# Copyright (C) 1995-1999 by Russell King CFLAGS_PROC := ASFLAGS_PROC := -# All processors get `-mshort-load-bytes' for now, to work around alignment -# problems. This is more of a hack that just happens to work than a real fix -# but it will do for now. +# GCC 2.7 uses different options to later compilers; sort out which we have +CONFIG_GCC_NEW := $(shell if $(CC) --version 2>&1 | grep '^2\.7' > /dev/null; then echo n; else echo y; fi) + +# Hack to get around RiscPC with StrongARM optimistaion +# problem - force ARM710 optimisation for now. +ifeq ($(CONFIG_GCC_NEW),y) + ifeq ($(CONFIG_ARCH_RPC),y) + ifeq ($(CONFIG_CPU_SA110),y) + CONFIG_CPU_SA110 := n + CONFIG_CPU_ARM7 := y + endif + endif +endif ifeq ($(CONFIG_CPU_26),y) PROCESSOR = armo TEXTADDR = 0x02080000 ZTEXTADDR = 0x01800000 ZRELADDR = 0x02080000 - ifeq ($(CONFIG_BINUTILS_NEW),y) + ifeq ($(CONFIG_GCC_NEW),y) CFLAGS_PROC += -mapcs-26 -mshort-load-bytes ifeq ($(CONFIG_CPU_ARM2),y) CFLAGS_PROC += -mcpu=arm2 @@ -49,7 +59,7 @@ endif ifeq ($(CONFIG_CPU_32),y) PROCESSOR = armv TEXTADDR = 0xC0008000 - ifeq ($(CONFIG_BINUTILS_NEW),y) + ifeq ($(CONFIG_GCC_NEW),y) CFLAGS_PROC += -mapcs-32 -mshort-load-bytes ifeq ($(CONFIG_CPU_ARM6),y) CFLAGS_PROC += -mcpu=arm6 @@ -68,10 +78,11 @@ endif # Processor Architecture # CFLAGS_PROC - processor dependent CFLAGS -# PROCESSOR - processor type -# TEXTADDR - Uncompressed kernel link text address -# ZTEXTADDR - Compressed kernel link text address -# ZRELADDR - Compressed kernel relocating address (point at which uncompressed kernel is loaded). +# PROCESSOR - processor type +# TEXTADDR - Uncompressed kernel link text address +# ZTEXTADDR - Compressed kernel link text address +# ZRELADDR - Compressed kernel relocating address +# (point at which uncompressed kernel is loaded). # COMPRESSED_HEAD = head.o @@ -79,19 +90,16 @@ COMPRESSED_HEAD = head.o ifeq ($(CONFIG_ARCH_A5K),y) MACHINE = a5k ARCHDIR = arc -COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o endif ifeq ($(CONFIG_ARCH_ARC),y) MACHINE = arc ARCHDIR = arc -COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o endif ifeq ($(CONFIG_ARCH_RPC),y) MACHINE = rpc ARCHDIR = rpc -COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o ZTEXTADDR = 0x10008000 ZRELADDR = 0x10008000 endif @@ -103,13 +111,17 @@ ZTEXTADDR = 0x00008000 ZRELADDR = 0x00008000 endif -ifeq ($(CONFIG_ARCH_EBSA285),y) -MACHINE = ebsa285 +ifeq ($(CONFIG_FOOTBRIDGE),y) +MACHINE = footbridge ARCHDIR = ebsa285 ZTEXTADDR = 0x00008000 ZRELADDR = 0x00008000 endif +ifeq ($(CONFIG_ARCH_CO285),y) +TEXTADDR = 0x60008000 +endif + ifeq ($(CONFIG_ARCH_NEXUSPCI),y) MACHINE = nexuspci ARCHDIR = nexuspci @@ -119,31 +131,13 @@ COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr_scc.o COMPRESSED_HEAD = head-nexuspci.o endif -ifeq ($(CONFIG_ARCH_VNC),y) -TEXTADDR = 0xC000C000 -MACHINE = vnc -ARCHDIR = vnc -endif - -ifeq ($(CONFIG_ARCH_TBOX),y) -MACHINE = tbox -ARCHDIR = tbox -ZTEXTADDR = 0x80008000 -ZRELDIR = 0x80008000 -endif - PERL = perl -ifeq ($(CONFIG_BINUTILS_NEW),y) -LD = $(CROSS_COMPILE)ld -m elf32arm -else -LD = $(CROSS_COMPILE)ld -m elf_arm -endif +LD = $(CROSS_COMPILE)ld OBJCOPY = $(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S OBJDUMP = $(CROSS_COMPILE)objdump CPP = $(CC) -E ARCHCC := $(word 1,$(CC)) GCCLIB := `$(CC) $(CFLAGS_PROC) --print-libgcc-file-name` -#GCCARCH := -B/usr/bin/arm-linuxelf- HOSTCFLAGS := $(CFLAGS:-fomit-frame-pointer=) ifeq ($(CONFIG_FRAME_POINTER),y) CFLAGS := $(CFLAGS:-fomit-frame-pointer=) @@ -153,75 +147,40 @@ ASFLAGS := $(ASFLAGS_PROC) $(ASFLAGS) LINKFLAGS = -T $(TOPDIR)/arch/arm/vmlinux-$(PROCESSOR).lds -e stext -Ttext $(TEXTADDR) ZLINKFLAGS = -Ttext $(ZTEXTADDR) -SUBDIRS := $(SUBDIRS:drivers=arch/arm/drivers) arch/arm/lib arch/arm/kernel arch/arm/mm -HEAD := arch/arm/kernel/head-$(PROCESSOR).o arch/arm/kernel/init_task.o +# If we're intending to debug the kernel, make sure it has line number +# information. This gets stripped out when building (z)Image so it doesn't +# add anything to the footprint of the running kernel. +ifeq ($(CONFIG_DEBUG_INFO),y) +CFLAGS += -g +endif + +HEAD := arch/arm/kernel/head-$(PROCESSOR).o \ + arch/arm/kernel/init_task.o +SUBDIRS := arch/arm/special $(SUBDIRS) arch/arm/lib arch/arm/kernel \ + arch/arm/mm arch/arm/nwfpe CORE_FILES := arch/arm/kernel/kernel.o arch/arm/mm/mm.o $(CORE_FILES) LIBS := arch/arm/lib/lib.a $(LIBS) $(GCCLIB) - -BLOCK_DRIVERS := drivers/block/block.a -CDROM_DRIVERS := drivers/cdrom/cdrom.a -ifeq ($(CONFIG_FB),y) -CHAR_DRIVERS := arch/arm/drivers/char1/char1.a drivers/char/char.a arch/arm/drivers/char1/char1.a -else -ifeq ($(CONFIG_VGA_CONSOLE),y) -CHAR_DRIVERS := arch/arm/drivers/char1/char1.a drivers/char/char.a arch/arm/drivers/char1/char1.a -else -CHAR_DRIVERS := arch/arm/drivers/char/char.a -endif -endif -MISC_DRIVERS := drivers/misc/misc.a -NET_DRIVERS := drivers/net/net.a -PARIDE_DRIVERS := drivers/block/paride/paride.a -PCI_DRIVERS := drivers/pci/pci.a -SCSI_DRIVERS := drivers/scsi/scsi.a -SOUND_DRIVERS := drivers/sound/sound.a -VIDEO_DRIVERS := drivers/video/video.a -PNP_DRIVERS := drivers/pnp/pnp.a +DRIVERS += arch/arm/special/special.a ifeq ($(CONFIG_ARCH_ACORN),y) -BLOCK_DRIVERS += drivers/acorn/block/acorn-block.a -CHAR_DRIVERS += drivers/acorn/char/acorn-char.a -NET_DRIVERS += drivers/acorn/net/acorn-net.a drivers/net/net.a -SCSI_DRIVERS += drivers/acorn/scsi/acorn-scsi.a +SUBDIRS += drivers/acorn/block drivers/acorn/char drivers/acorn/net \ + drivers/acorn/scsi +DRIVERS += drivers/acorn/block/acorn-block.a \ + drivers/acorn/char/acorn-char.a \ + drivers/acorn/net/acorn-net.a \ + drivers/acorn/scsi/acorn-scsi.a endif -DRIVERS := $(BLOCK_DRIVERS) $(CHAR_DRIVERS) $(MISC_DRIVERS) $(NET_DRIVERS) - -ifeq ($(CONFIG_FB),y) -DRIVERS := $(DRIVERS) $(VIDEO_DRIVERS) -else -ifeq ($(CONFIG_VGA_CONSOLE),y) -DRIVERS := $(DRIVERS) $(VIDEO_DRIVERS) -endif -endif -ifeq ($(CONFIG_SCSI),y) -DRIVERS := $(DRIVERS) $(SCSI_DRIVERS) -endif -ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR),) -DRIVERS := $(DRIVERS) $(CDROM_DRIVERS) -endif -ifdef CONFIG_PCI -DRIVERS := $(DRIVERS) $(PCI_DRIVERS) -endif -ifeq ($(CONFIG_SOUND),y) -DRIVERS := $(DRIVERS) $(SOUND_DRIVERS) -endif -ifeq ($(CONFIG_PARIDE),y) -DRIVERS := $(DRIVERS) $(PARIDE_DRIVERS) -endif -ifdef CONFIG_PNP -DRIVERS := $(DRIVERS) $(PNP_DRIVERS) +ifeq ($(CONFIG_NWFPE),y) +DRIVERS += arch/arm/nwfpe/math-emu.a endif +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + symlinks:: $(RM) include/asm-arm/arch include/asm-arm/proc (cd include/asm-arm; ln -sf arch-$(ARCHDIR) arch; ln -sf proc-$(PROCESSOR) proc) -# Once we've finished integrating the sources, the @$(MAKE) will disappear -archmrproper: - rm -f include/asm-arm/arch include/asm-arm/proc - @$(MAKE) -C arch/$(ARCH)/drivers mrproper - arch/arm/kernel: dummy $(MAKE) linuxsubdirs SUBDIRS=arch/arm/kernel @@ -231,19 +190,20 @@ arch/arm/mm: dummy arch/arm/lib: dummy $(MAKE) linuxsubdirs SUBDIRS=arch/arm/lib -MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot - -zImage: vmlinux - @$(MAKEBOOT) zImage +zImage zinstall Image install: vmlinux + @$(MAKEBOOT) $@ -zinstall: vmlinux - @$(MAKEBOOT) zinstall +# Once we've finished integrating the sources, the @$(MAKE) will disappear +archmrproper: + rm -f include/asm-arm/arch include/asm-arm/proc + @$(MAKE) -C arch/$(ARCH)/special mrproper -Image: vmlinux - @$(MAKEBOOT) Image +archclean: + @$(MAKEBOOT) clean + $(RM) arch/arm/lib/constants.h -install: vmlinux - @$(MAKEBOOT) install +archdep: + @$(MAKEBOOT) dep # My testing targets (that short circuit a few dependencies) zImg:; @$(MAKEBOOT) zImage @@ -251,10 +211,19 @@ Img:; @$(MAKEBOOT) Image i:; @$(MAKEBOOT) install zi:; @$(MAKEBOOT) zinstall -archclean: - @$(MAKEBOOT) clean - $(RM) arch/arm/lib/constants.h +a5k_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/a5k arch/arm/defconfig + +ebsa110_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/ebsa110 arch/arm/defconfig + +footbridge_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/footbridge arch/arm/defconfig + +rpc_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/rpc arch/arm/defconfig -archdep: - @$(MAKEBOOT) dep -sed -e /^MACHINE..*=/s,= .*,= rpc,;/^PROCESSOR..*=/s,= .*,= armv, linux/arch/arm/Makefile.normal diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 0c6a04c5b..cf1481ab0 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -11,10 +11,15 @@ HEAD =$(COMPRESSED_HEAD) OBJS =$(HEAD) misc.o $(COMPRESSED_EXTRA) CFLAGS =-O2 -DSTDC_HEADERS $(CFLAGS_PROC) ARFLAGS =rc +FONTC =$(TOPDIR)/drivers/video/font_acorn_8x8.c + +ifeq ($(CONFIG_ARCH_ACORN),y) +OBJS += ll_char_wr.o font.o +endif all: vmlinux -vmlinux: piggy.o $(OBJS) +vmlinux: $(OBJS) piggy.o $(LD) $(ZLINKFLAGS) -o vmlinux $(OBJS) piggy.o $(HEAD): $(HEAD:.o=.S) @@ -29,5 +34,8 @@ piggy.o: $(SYSTEM) $(LD) -r -o piggy.o -b binary $$tmppiggy.gz -b elf32-arm -T $$tmppiggy.lnk; \ rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk; +font.o: $(FONTC) + $(CC) -Dstatic= -c -o $@ $(FONTC) + clean:; rm -f vmlinux core diff --git a/arch/arm/lib/ll_char_wr.S b/arch/arm/boot/compressed/ll_char_wr.S index 966d2846e..57865f2fd 100644 --- a/arch/arm/lib/ll_char_wr.S +++ b/arch/arm/boot/compressed/ll_char_wr.S @@ -12,7 +12,7 @@ @ Regs: [] = corruptible @ {} = used @ () = do not use - +#define __ASSEMBLY__ #include <linux/linkage.h> #include <asm/assembler.h> .text @@ -25,7 +25,7 @@ LC0: .word SYMBOL_NAME(bytes_per_char_h) .word SYMBOL_NAME(video_size_row) - .word SYMBOL_NAME(cmap_80) + .word SYMBOL_NAME(acorndata_8x8) .word SYMBOL_NAME(con_charconvtable) ENTRY(ll_write_char) diff --git a/arch/arm/config.in b/arch/arm/config.in index 467218db7..2fea6a661 100644 --- a/arch/arm/config.in +++ b/arch/arm/config.in @@ -14,18 +14,31 @@ choice 'ARM system type' \ A5000 CONFIG_ARCH_A5K \ RiscPC CONFIG_ARCH_RPC \ EBSA-110 CONFIG_ARCH_EBSA110 \ - EBSA-285 CONFIG_ARCH_EBSA285 \ - NexusPCI CONFIG_ARCH_NEXUSPCI \ - Corel-VNC CONFIG_ARCH_VNC \ - Tbox CONFIG_ARCH_TBOX" RiscPC + FootBridge-based CONFIG_FOOTBRIDGE" RiscPC + +if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then + bool 'FootBridge in HOST mode' CONFIG_HOST_FOOTBRIDGE + if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then + define_bool CONFIG_ADDIN_FOOTBRIDGE n + else + define_bool CONFIG_ADDIN_FOOTBRIDGE y + fi +fi + +if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then + bool ' Include support for Intel EBSA285' CONFIG_ARCH_EBSA285 + bool ' Include support for Chalice CATS boards' CONFIG_CATS + bool ' Include support for Corel NetWinder' CONFIG_ARCH_NETWINDER +fi -if [ "$CONFIG_ARCH_EBSA285" = "y" ]; then - bool ' Include support for CATS boards' CONFIG_CATS +if [ "$CONFIG_ADDIN_FOOTBRIDGE" = "y" ]; then + # If we get any other footbridge-based plug-in boards, then + # add your architecture options here + define_bool CONFIG_ARCH_CO285 y fi # Select various configuration options depending on the machine type # Easy check for Acorn-style architectures - if [ "$CONFIG_ARCH_ARC" = "y" -o \ "$CONFIG_ARCH_A5K" = "y" -o \ "$CONFIG_ARCH_RPC" = "y" ]; then @@ -34,23 +47,19 @@ else define_bool CONFIG_ARCH_ACORN n fi -if [ "$CONFIG_ARCH_TBOX" = "y" ]; then - define_bool CONFIG_BUS_I2C y -fi +#if [ "$CONFIG_ARCH_TBOX" = "y" ]; then +# define_bool CONFIG_BUS_I2C y +#fi # These machines always have PCI - if [ "$CONFIG_ARCH_NEXUSPCI" = "y" -o \ - "$CONFIG_ARCH_VNC" = "y" ]; then + "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then define_bool CONFIG_PCI y fi -if [ "$CONFIG_ARCH_EBSA285" = "y" ]; then - bool "PCI support" CONFIG_PCI -fi # These machines have ISA-DMA if [ "$CONFIG_CATS" = "y" -o \ - "$CONFIG_ARCH_VNC" = "y" ]; then + "$CONFIG_ARCH_NETWINDER" = "y" ]; then define_bool CONFIG_ISA_DMA y else define_bool CONFIG_ISA_DMA n @@ -59,7 +68,6 @@ fi # Figure out whether this system uses 26-bit or 32-bit CPUs. Nobody has # ever built a machine that can take both, and now that ARM3 is obsolete # nobody is likely to either. - if [ "$CONFIG_ARCH_ARC" = "y" -o \ "$CONFIG_ARCH_A5K" = "y" ]; then define_bool CONFIG_CPU_32 n @@ -71,7 +79,6 @@ fi # Now allow the user to choose a more precise CPU. This is only used to set # the flags we pass to GCC, not in any code. - choice 'Optimise for CPU' \ "ARM2 CONFIG_CPU_ARM2 \ ARM3 CONFIG_CPU_ARM3 \ @@ -80,22 +87,21 @@ choice 'Optimise for CPU' \ SA110 CONFIG_CPU_SA110" ARM6 if [ "$CONFIG_CPU_26" = "y" ]; then - # For 26-bit CPUs, the page size changes with the amount of physical RAM! # The default is 4MB but if the user has less they have to own up to it here. - choice 'Physical memory size' \ "4MB+ CONFIG_PAGESIZE_32 \ - 2MB CONFIG_PAGESIZE_16 \ - 1MB/512K CONFIG_PAGESIZE_8" 4MB+ + 2MB CONFIG_PAGESIZE_16" 4MB+ fi endmenu mainmenu_option next_comment comment 'Code maturity level options' bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -bool 'Use new compilation options (for GCC 2.8)' CONFIG_BINUTILS_NEW -bool 'Compile kernel with frame pointer (for useful debugging)' CONFIG_FRAME_POINTER +if [ "$CONFIG_CPU_32" = "y" -a "$CONFIG_ARCH_EBSA110" != "y" ]; then + bool 'Enable kernel-mode alignment trap handler (EXPERIMENTAL)' CONFIG_ALIGNMENT_TRAP +fi +bool 'Split text into discardable sections' CONFIG_TEXT_SECTIONS endmenu mainmenu_option next_comment @@ -113,13 +119,19 @@ bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL +tristate 'Math emulation' CONFIG_NWFPE tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC +if [ "$CONFIG_CPU_32" = "y" ]; then + tristate 'RISC OS personality' CONFIG_ARTHUR +fi tristate 'Parallel port support' CONFIG_PARPORT if [ "$CONFIG_PARPORT" != "n" ]; then - dep_tristate ' Archimedes hardware' CONFIG_PARPORT_ARC $CONFIG_PARPORT + if [ "$CONFIG_ARCH_ARC" = "y" ]; then + dep_tristate ' Archimedes hardware' CONFIG_PARPORT_ARC $CONFIG_PARPORT + fi dep_tristate ' PC-style hardware' CONFIG_PARPORT_PC $CONFIG_PARPORT # If exactly one hardware type is selected then parport will optimise away # support for loading any others. Defeat this if the user is keen. @@ -129,13 +141,29 @@ if [ "$CONFIG_PARPORT" != "n" ]; then fi fi fi -if [ "$CONFIG_ARCH_EBSA285" = "y" -o \ - "$CONFIG_ARCH_EBSA110" = "y" -o \ - "$CONFIG_ARCH_VNC" = "y" ]; then +if [ "$CONFIG_ARCH_EBSA110" = "y" -o \ + "$CONFIG_ARCH_NETWINDER" = "y" -o \ + "$CONFIG_CATS" = "y" ]; then string 'Initial kernel command string' CONFIG_CMDLINE fi +if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \ + "$CONFIG_ARCH_EBSA110" = "y" -o \ + "$CONFIG_ARCH_EBSA285" = "y" -o \ + "$CONFIG_ARCH_CO285" = "y" ]; then + bool 'Timer and CPU usage LEDs' CONFIG_LEDS + if [ "$CONFIG_LEDS" = "y" ]; then + if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \ + "$CONFIG_ARCH_EBSA285" = "y" -o \ + "$CONFIG_ARCH_CO285" = "y" ]; then + bool ' Timer LED' CONFIG_LEDS_TIMER + bool ' CPU usage LED' CONFIG_LEDS_CPU + fi + fi +fi endmenu +source drivers/i2o/Config.in + source drivers/pnp/Config.in source drivers/block/Config.in @@ -144,15 +172,19 @@ if [ "$CONFIG_ARCH_ACORN" = "y" ]; then source drivers/acorn/block/Config.in fi -if [ "$CONFIG_VGA_CONSOLE" = "n" -a "$CONFIG_FB" = "n" ]; then - source arch/arm/drivers/char/Config.in -else - source drivers/char/Config.in -fi +source drivers/char/Config.in if [ "$CONFIG_ARCH_ACORN" = "y" ]; then - source drivers/acorn/char/Config.in + if [ "$CONFIG_MOUSE" = "y" ]; then + if [ "$CONFIG_ARCH_RPC" != "y" ]; then + define_bool CONFIG_KBDMOUSE y + else + define_bool CONFIG_RPCMOUSE y + fi + fi fi +source drivers/usb/Config.in + if [ "$CONFIG_VT" = "y" ]; then mainmenu_option next_comment comment 'Console drivers' @@ -166,9 +198,11 @@ fi if [ "$CONFIG_NET" = "y" ]; then source net/Config.in -fi -if [ "$CONFIG_NET" = "y" ]; then + source net/ax25/Config.in + + source net/irda/Config.in + mainmenu_option next_comment comment 'Network device support' @@ -179,6 +213,15 @@ if [ "$CONFIG_NET" = "y" ]; then endmenu fi +# mainmenu_option next_comment +# comment 'ISDN subsystem' +# +# tristate 'ISDN support' CONFIG_ISDN +# if [ "$CONFIG_ISDN" != "n" ]; then +# source drivers/isdn/Config.in +# fi +# endmenu + mainmenu_option next_comment comment 'SCSI support' @@ -200,21 +243,29 @@ if [ "$CONFIG_ARCH_ACORN" = "y" -o "$CONFIG_PCI" = "y" ]; then endmenu fi -# mainmenu_option next_comment -# comment 'ISDN subsystem' -# -# tristate 'ISDN support' CONFIG_ISDN -# if [ "$CONFIG_ISDN" != "n" ]; then -# source drivers/isdn/Config.in -# fi -# endmenu - source fs/Config.in mainmenu_option next_comment comment 'Kernel hacking' -bool 'Debug kernel errors' CONFIG_DEBUG_ERRORS +bool 'Compile kernel with frame pointer (for useful debugging)' CONFIG_FRAME_POINTER +bool 'Verbose kernel error messages' CONFIG_DEBUG_ERRORS +bool 'Verbose user fault messages' CONFIG_DEBUG_USER +bool 'Include debugging information in kernel binary' CONFIG_DEBUG_INFO #bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + if [ "$CONFIG_CPU_26" = "y" ]; then + bool 'Disable pgtable cache (EXPERIMENTAL)' CONFIG_NO_PGT_CACHE + fi + + # These options are only for real kernel hackers + # who want to get their hands dirty. + bool 'Kernel low-level debugging functions' CONFIG_DEBUG_LL + if [ "$CONFIG_DEBUG_LL" = "y" ]; then + if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then + bool 'Kernel low-level debugging messages via DC21285 port' CONFIG_DEBUG_DC21285_PORT + fi + fi +fi endmenu diff --git a/arch/arm/defconfig b/arch/arm/defconfig index db89599be..ce85d6ffc 100644 --- a/arch/arm/defconfig +++ b/arch/arm/defconfig @@ -4,47 +4,71 @@ CONFIG_ARM=y # +# System and processor type +# +# CONFIG_ARCH_ARC is not set +# CONFIG_ARCH_A5K is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_EBSA110 is not set +CONFIG_FOOTBRIDGE=y +CONFIG_HOST_FOOTBRIDGE=y +# CONFIG_ADDIN_FOOTBRIDGE is not set +CONFIG_ARCH_EBSA285=y +# CONFIG_CATS is not set +CONFIG_ARCH_NETWINDER=y +# CONFIG_ARCH_ACORN is not set +CONFIG_PCI=y +CONFIG_ISA_DMA=y +CONFIG_CPU_32=y +# CONFIG_CPU_26 is not set +# CONFIG_CPU_ARM2 is not set +# CONFIG_CPU_ARM3 is not set +# CONFIG_CPU_ARM6 is not set +# CONFIG_CPU_ARM7 is not set +CONFIG_CPU_SA110=y + +# # Code maturity level options # CONFIG_EXPERIMENTAL=y +# CONFIG_ALIGNMENT_TRAP is not set +# CONFIG_TEXT_SECTIONS is not set # # Loadable module support # CONFIG_MODULES=y -CONFIG_MODVERSIONS=y +# CONFIG_MODVERSIONS is not set CONFIG_KMOD=y # # General setup # -# CONFIG_ARCH_ARC is not set -# CONFIG_ARCH_A5K is not set -CONFIG_ARCH_RPC=y -# CONFIG_ARCH_EBSA110 is not set -# CONFIG_ARCH_NEXUSPCI is not set -CONFIG_ARCH_ACORN=y -# CONFIG_PCI is not set -# CONFIG_CPU_ARM2 is not set -# CONFIG_CPU_ARM3 is not set -# CONFIG_CPU_ARM6 is not set -CONFIG_CPU_SA110=y -CONFIG_FRAME_POINTER=y -# CONFIG_BINUTILS_NEW is not set -CONFIG_DEBUG_ERRORS=y CONFIG_NET=y CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y +CONFIG_NWFPE=y CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=m -# CONFIG_BINFMT_JAVA is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +# CONFIG_ARTHUR is not set CONFIG_PARPORT=y CONFIG_PARPORT_PC=y +CONFIG_CMDLINE="root=/dev/hda2 ro mem=32M parport=0x378,7 ide0=autotune" +CONFIG_LEDS=y +CONFIG_LEDS_TIMER=y +# CONFIG_LEDS_CPU is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set # -# Floppy, IDE, and other block devices +# Block devices # -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set CONFIG_BLK_DEV_IDE=y # @@ -52,32 +76,165 @@ CONFIG_BLK_DEV_IDE=y # # CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y -CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDECD is not set # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_BLK_DEV_IDESCSI is not set -# CONFIG_BLK_DEV_IDE_PCMCIA is not set -CONFIG_BLK_DEV_IDE_CARDS=y -CONFIG_BLK_DEV_IDE_ICSIDE=y -# CONFIG_BLK_DEV_IDE_RAPIDE is not set -# CONFIG_BLK_DEV_XD is not set +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_BLK_DEV_IDEDMA=y +CONFIG_BLK_DEV_OFFBOARD=y +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_VIA82C586 is not set +# CONFIG_BLK_DEV_CMD646 is not set +CONFIG_BLK_DEV_SL82C105=y +# CONFIG_IDE_CHIPSETS is not set # # Additional Block Devices # CONFIG_BLK_DEV_LOOP=m -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_STRIPED=m +CONFIG_MD_MIRRORING=m +CONFIG_MD_RAID5=m CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_INITRD=y +# CONFIG_BLK_DEV_INITRD is not set +# CONFIG_BLK_DEV_XD is not set CONFIG_PARIDE_PARPORT=y -# CONFIG_PARIDE is not set -CONFIG_BLK_DEV_PART=y +CONFIG_PARIDE=m + +# +# Parallel IDE high-level drivers +# +CONFIG_PARIDE_PD=m +CONFIG_PARIDE_PCD=m +CONFIG_PARIDE_PF=m +CONFIG_PARIDE_PT=m +CONFIG_PARIDE_PG=m + +# +# Parallel IDE protocol modules +# +CONFIG_PARIDE_ATEN=m +CONFIG_PARIDE_BPCK=m +CONFIG_PARIDE_COMM=m +CONFIG_PARIDE_DSTR=m +CONFIG_PARIDE_FIT2=m +CONFIG_PARIDE_FIT3=m +CONFIG_PARIDE_EPAT=m +CONFIG_PARIDE_EPIA=m +CONFIG_PARIDE_FRIQ=m +CONFIG_PARIDE_FRPW=m +CONFIG_PARIDE_KBIC=m +CONFIG_PARIDE_KTTI=m +CONFIG_PARIDE_ON20=m +CONFIG_PARIDE_ON26=m # CONFIG_BLK_DEV_HD is not set # +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +# CONFIG_SERIAL_EXTENDED is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_UNIX98_PTYS is not set +CONFIG_PRINTER=m +CONFIG_PRINTER_READBACK=y +CONFIG_MOUSE=y + +# +# Mice +# +# CONFIG_ATIXL_BUSMOUSE is not set +# CONFIG_BUSMOUSE is not set +# CONFIG_MS_BUSMOUSE is not set +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_QIC02_TAPE is not set +CONFIG_WATCHDOG=y + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG_NOWAYOUT is not set +# CONFIG_WDT is not set +CONFIG_SOFT_WATCHDOG=y +# CONFIG_PCWATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +CONFIG_DS1620=y +CONFIG_NWBUTTON=y +CONFIG_NWBUTTON_REBOOT=y +CONFIG_NWFLASH=m +# CONFIG_NVRAM is not set +CONFIG_RTC=y + +# +# Video For Linux +# +# CONFIG_VIDEO_DEV is not set + +# +# Joystick support +# +# CONFIG_JOYSTICK is not set +# CONFIG_DTLK is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +CONFIG_FB=y +CONFIG_DUMMY_CONSOLE=y +# CONFIG_FB_PM2 is not set +CONFIG_FB_CYBER2000=y +# CONFIG_FB_MATROX is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_FBCON_ADVANCED=y +# CONFIG_FBCON_MFB is not set +# CONFIG_FBCON_CFB2 is not set +# CONFIG_FBCON_CFB4 is not set +CONFIG_FBCON_CFB8=y +CONFIG_FBCON_CFB16=y +CONFIG_FBCON_CFB24=y +# CONFIG_FBCON_CFB32 is not set +# CONFIG_FBCON_AFB is not set +# CONFIG_FBCON_ILBM is not set +# CONFIG_FBCON_IPLAN2P2 is not set +# CONFIG_FBCON_IPLAN2P4 is not set +# CONFIG_FBCON_IPLAN2P8 is not set +# CONFIG_FBCON_MAC is not set +CONFIG_FBCON_VGA=y +# CONFIG_FBCON_FONTWIDTH8_ONLY is not set +CONFIG_FBCON_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +# CONFIG_FONT_SUN8x16 is not set +# CONFIG_FONT_SUN12x22 is not set +# CONFIG_FONT_6x11 is not set +# CONFIG_FONT_PEARL_8x8 is not set +CONFIG_FONT_ACORN_8x8=y + +# # Networking options # -# CONFIG_PACKET is not set +CONFIG_PACKET=y # CONFIG_NETLINK is not set # CONFIG_FIREWALL is not set # CONFIG_FILTER is not set @@ -85,21 +242,20 @@ CONFIG_UNIX=y CONFIG_INET=y # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_IP_ACCT is not set -# CONFIG_IP_MASQUERADE is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set # CONFIG_IP_ROUTER is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_ALIAS is not set +CONFIG_IP_ALIAS=y # CONFIG_SYN_COOKIES is not set # # (it is safe to leave these untouched) # # CONFIG_INET_RARP is not set -CONFIG_IP_NOSR=y -# CONFIG_SKB_LARGE is not set +CONFIG_SKB_LARGE=y # CONFIG_IPV6 is not set # @@ -111,107 +267,198 @@ CONFIG_IP_NOSR=y # CONFIG_LAPB is not set # CONFIG_BRIDGE is not set # CONFIG_LLC is not set +# CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set # CONFIG_NET_FASTROUTE is not set # CONFIG_NET_HW_FLOWCONTROL is not set # CONFIG_CPU_IS_SLOW is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set -# CONFIG_NET_PROFILE is not set # -# SCSI support +# Amateur Radio support # -CONFIG_SCSI=y +# CONFIG_HAMRADIO is not set # -# SCSI support type (disk, tape, CD-ROM) +# IrDA subsystem support # -CONFIG_BLK_DEV_SD=y -# CONFIG_CHR_DEV_ST is not set -CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set -# CONFIG_CHR_DEV_SG is not set +# CONFIG_IRDA is not set # -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# Network device support # -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y +CONFIG_NETDEVICES=y +# CONFIG_ARCNET is not set +# CONFIG_DUMMY is not set +# CONFIG_EQUALIZER is not set +CONFIG_NET_ETHERNET=y +# CONFIG_ARM_AM79C961A is not set +CONFIG_NET_VENDOR_3COM=y +# CONFIG_EL1 is not set +# CONFIG_EL2 is not set +# CONFIG_ELPLUS is not set +# CONFIG_EL16 is not set +# CONFIG_EL3 is not set +# CONFIG_3C515 is not set +CONFIG_VORTEX=y +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_RTL8139 is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_ACENIC is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_EISA=y +# CONFIG_PCNET32 is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +# CONFIG_DE4X5 is not set +CONFIG_DEC_ELCP=m +# CONFIG_DGRS is not set +# CONFIG_EEXPRESS_PRO100 is not set +# CONFIG_LNE390 is not set +# CONFIG_NE3210 is not set +CONFIG_NE2K_PCI=y +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_ES3210 is not set +# CONFIG_EPIC100 is not set +# CONFIG_ZNET is not set +# CONFIG_NET_POCKET is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_DLCI is not set +# CONFIG_PLIP is not set +CONFIG_PPP=m # -# SCSI low-level drivers +# CCP compressors for PPP are only built as modules. # -CONFIG_SCSI_ACORNSCSI_3=m -CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE=y -CONFIG_SCSI_ACORNSCSI_SYNC=y -CONFIG_SCSI_CUMANA_2=m -CONFIG_SCSI_POWERTECSCSI=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y +# CONFIG_NET_RADIO is not set +# CONFIG_TR is not set +# CONFIG_SHAPER is not set +# CONFIG_HOSTESS_SV11 is not set +# CONFIG_COSA is not set +# CONFIG_RCPCI is not set # -# The following drives are not fully supported +# SCSI support # -CONFIG_SCSI_CUMANA_1=m -CONFIG_SCSI_OAK1=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_PPA_HAVE_PEDANTIC=2 +# CONFIG_SCSI is not set # -# Network device support +# Sound # -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_EQUALIZER is not set -CONFIG_PPP=m +CONFIG_SOUND=m +# CONFIG_SOUND_ES1370 is not set +# CONFIG_SOUND_ES1371 is not set +# CONFIG_SOUND_SONICVIBES is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_OSS=m +# CONFIG_SOUND_PAS is not set +CONFIG_SOUND_SB=m +CONFIG_SOUND_ADLIB=m +# CONFIG_SOUND_GUS is not set +# CONFIG_SOUND_MPU401 is not set +# CONFIG_SOUND_PSS is not set +# CONFIG_SOUND_MSS is not set +# CONFIG_SOUND_SSCAPE is not set +# CONFIG_SOUND_TRIX is not set +# CONFIG_SOUND_MAD16 is not set +# CONFIG_SOUND_WAVEFRONT is not set +# CONFIG_SOUND_CS4232 is not set +# CONFIG_SOUND_OPL3SA2 is not set +# CONFIG_SOUND_MAUI is not set +# CONFIG_SOUND_SGALAXY is not set +# CONFIG_SOUND_AD1816 is not set +# CONFIG_SOUND_OPL3SA1 is not set +# CONFIG_SOUND_SOFTOSS is not set +# CONFIG_SOUND_YM3812 is not set +# CONFIG_SOUND_VMIDI is not set +# CONFIG_SOUND_UART6850 is not set +# CONFIG_SOUND_VIDC is not set +CONFIG_SOUND_WAVEARTIST=m +CONFIG_WAVEARTIST_BASE=250 +CONFIG_WAVEARTIST_IRQ=12 +CONFIG_WAVEARTIST_DMA=3 +CONFIG_WAVEARTIST_DMA2=7 # -# CCP compressors for PPP are only built as modules. +# Additional low level sound drivers # -# CONFIG_SLIP is not set -CONFIG_ETHER1=m -CONFIG_ETHER3=m -CONFIG_ETHERH=m +# CONFIG_LOWLEVEL_SOUND is not set # # Filesystems # # CONFIG_QUOTA is not set -# CONFIG_MINIX_FS is not set -CONFIG_EXT2_FS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y +# CONFIG_AUTOFS_FS is not set +CONFIG_ADFS_FS=y +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m # CONFIG_UMSDOS_FS is not set -CONFIG_VFAT_FS=y +CONFIG_VFAT_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_MINIX_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set CONFIG_PROC_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set CONFIG_NFS_FS=y -CONFIG_NFSD=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=m +# CONFIG_NFSD_SUN is not set CONFIG_SUNRPC=y CONFIG_LOCKD=y -# CONFIG_CODA_FS is not set # CONFIG_SMB_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_UFS_FS is not set -CONFIG_ADFS_FS=y -CONFIG_ADFS_FS=y +# CONFIG_NCP_FS is not set + +# +# Partition Types +# +# CONFIG_OSF_PARTITION is not set # CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SGI_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +CONFIG_ACORN_PARTITION=y +CONFIG_ACORN_PARTITION_ADFS=y +# CONFIG_ACORN_PARTITION_ICS is not set +# CONFIG_ACORN_PARTITION_POWERTEC is not set +# CONFIG_ACORN_PARTITION_RISCIX is not set CONFIG_NLS=y # # Native Language Support # -# CONFIG_NLS_CODEPAGE_437 is not set +CONFIG_NLS_CODEPAGE_437=m # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set @@ -223,8 +470,8 @@ CONFIG_NLS=y # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_1 is not set -# CONFIG_NLS_ISO8859_2 is not set +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set @@ -232,34 +479,15 @@ CONFIG_NLS=y # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_ISO8859_9 is not set +CONFIG_NLS_ISO8859_15=m # CONFIG_NLS_KOI8_R is not set # -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -# CONFIG_SERIAL_CONSOLE is not set -# CONFIG_SERIAL_EXTENDED is not set -CONFIG_ATOMWIDE_SERIAL=y -CONFIG_DUALSP_SERIAL=y -CONFIG_MOUSE=y -CONFIG_PRINTER=m -CONFIG_PRINTER_READBACK=y -# CONFIG_UMISC is not set -# CONFIG_WATCHDOG is not set -CONFIG_RPCMOUSE=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_VIDC=y -CONFIG_AUDIO=y -DSP_BUFFSIZE=65536 - -# # Kernel hacking # +CONFIG_FRAME_POINTER=y +CONFIG_DEBUG_ERRORS=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_INFO is not set CONFIG_MAGIC_SYSRQ=y +# CONFIG_DEBUG_LL is not set diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 23b2c1267..5bcc22af1 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -9,31 +9,37 @@ HEAD_OBJ = head-$(PROCESSOR).o ENTRY_OBJ = entry-$(PROCESSOR).o O_TARGET := kernel.o -O_OBJS := $(ENTRY_OBJ) ioport.o irq.o process.o ptrace.o setup.o \ +O_OBJS := $(ENTRY_OBJ) irq.o process.o ptrace.o setup.o \ signal.o sys_arm.o time.o traps.o -DMA_OBJS_arc = dma-arc.o -DMA_OBJS_a5k = dma-a5k.o -DMA_OBJS_rpc = dma-rpc.o -DMA_OBJS_ebsa110 = dma-dummy.o -DMA_OBJS_ebsa285 = dma-ebsa285.o -DMA_OBJS_nexuspci = -DMA_OBJS_vnc = dma-vnc.o - -O_OBJS_arc = ecard.o iic.o fiq.o oldlatches.o -O_OBJS_a5k = ecard.o iic.o fiq.o -O_OBJS_rpc = ecard.o iic.o fiq.o -O_OBJS_ebsa110 = leds-ebsa110.o -O_OBJS_ebsa285 = leds-ebsa285.o hw-ebsa285.o -O_OBJS_nexuspci = -O_OBJS_vnc = leds-ebsa285.o hw-vnc.o +ifeq ($(CONFIG_ISA_DMA),y) + ISA_DMA_OBJS += dma-isa.o +endif + +O_OBJS_arc = dma-arc.o iic.o fiq.o oldlatches.o +O_OBJS_a5k = dma-a5k.o iic.o fiq.o +O_OBJS_rpc = dma-rpc.o iic.o fiq.o +O_OBJS_ebsa110 = dma-dummy.o +O_OBJS_footbridge = dma-footbridge.o $(ISA_DMA_OBJS) +O_OBJS_nexuspci = dma-dummy.o + +OX_OBJS_arc = dma.o +OX_OBJS_a5k = dma.o +OX_OBJS_rpc = dma.o +OX_OBJS_ebsa110 = +OX_OBJS_footbridge= dma.o hw-footbridge.o +OX_OBJS_nexuspci = all: lib kernel.o $(HEAD_OBJ) init_task.o +O_OBJS += $(O_OBJS_$(MACHINE)) + ifeq ($(CONFIG_MODULES),y) OX_OBJS = armksyms.o -else - O_OBJS += armksyms.o +endif + +ifeq ($(CONFIG_ARCH_ACORN),y) + OX_OBJS += ecard.o endif ifeq ($(MACHINE),nexuspci) @@ -46,17 +52,23 @@ else endif endif -ifneq ($(DMA_OBJS_$(MACHINE)),) - OX_OBJS += dma.o - O_OBJS += $(DMA_OBJS_$(MACHINE)) - ifeq ($(CONFIG_ISA_DMA),y) - O_OBJS += dma-isa.o - endif +ifdef CONFIG_LEDS + OX_OBJS += leds-$(MACHINE).o +endif + +ifeq ($(CONFIG_MODULES),y) + OX_OBJS += $(OX_OBJS_$(MACHINE)) else - O_OBJS += dma-dummy.o + O_OBJS += $(OX_OBJS_$(MACHINE)) endif -O_OBJS += $(O_OBJS_$(MACHINE)) +ifeq ($(CONFIG_ARTHUR),y) + O_OBJS += arthur.o +else + ifeq ($(CONFIG_ARTHUR),m) + M_OBJS += arthur.o + endif +endif $(HEAD_OBJ): $(HEAD_OBJ:.o=.S) $(CC) -D__ASSEMBLY__ -DTEXTADDR=$(TEXTADDR) -traditional -c $(HEAD_OBJ:.o=.S) -o $@ @@ -72,3 +84,7 @@ $(ENTRY_OBJ): ../lib/constants.h lib: $(MAKE) -C ../lib constants.h + +# Spell out some dependencies that `make dep' doesn't spot +entry-armv.o: calls.S +entry-armo.o: calls.S diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 149349b4a..c93421ba7 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -6,20 +6,40 @@ #include <linux/mman.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/in6.h> -#include <asm/ecard.h> #include <asm/elf.h> #include <asm/io.h> #include <asm/dma.h> #include <asm/pgtable.h> +#include <asm/semaphore.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/checksum.h> extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, struct user_fp_struct *); extern void inswb(unsigned int port, void *to, int len); extern void outswb(unsigned int port, const void *to, int len); +extern unsigned int local_bh_count[NR_CPUS]; +extern unsigned int local_irq_count[NR_CPUS]; + +extern void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags); +extern void iounmap(void *addr); + +extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* + * syscalls + */ +extern int sys_write(int, const char *, int); +extern int sys_read(int, char *, int); +extern int sys_lseek(int, off_t, int); +extern int sys_open(const char *, int, int); +extern int sys_exit(int); +extern int sys_wait4(int, int *, int, struct rusage *); + /* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that @@ -43,6 +63,8 @@ extern void __udivsi3(void); extern void __umoddi3(void); extern void __umodsi3(void); +extern void ret_from_exception(void); +extern void fpundefinstr(void); extern void fp_enter(void); #define EXPORT_SYMBOL_ALIAS(sym,orig) \ const char __kstrtab_##sym##[] __attribute__((section(".kstrtab"))) = \ @@ -57,32 +79,46 @@ EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter); EXPORT_SYMBOL_ALIAS(fp_printk,printk); EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig); +#ifdef CONFIG_CPU_26 +EXPORT_SYMBOL(fpundefinstr); +EXPORT_SYMBOL(ret_from_exception); +#endif + /* platform dependent support */ EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(udelay); EXPORT_SYMBOL(xchg_str); - - /* expansion card support */ -#ifdef CONFIG_ARCH_ACORN -EXPORT_SYMBOL(ecard_startfind); -EXPORT_SYMBOL(ecard_find); -EXPORT_SYMBOL(ecard_readchunk); -EXPORT_SYMBOL(ecard_address); +EXPORT_SYMBOL(local_bh_count); +EXPORT_SYMBOL(local_irq_count); +#ifdef CONFIG_CPU_32 +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); #endif +EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(enable_irq); EXPORT_SYMBOL(disable_irq); /* processor dependencies */ EXPORT_SYMBOL(processor); -EXPORT_SYMBOL(machine_type); +EXPORT_SYMBOL(__machine_arch_type); + + /* networking */ +EXPORT_SYMBOL(csum_partial_copy); +EXPORT_SYMBOL(__csum_ipv6_magic); /* io */ -EXPORT_SYMBOL(outswb); +EXPORT_SYMBOL(outsb); EXPORT_SYMBOL(outsw); -EXPORT_SYMBOL(inswb); +EXPORT_SYMBOL(outsl); +EXPORT_SYMBOL(insb); EXPORT_SYMBOL(insw); +EXPORT_SYMBOL(insl); + +EXPORT_SYMBOL(_memcpy_fromio); +EXPORT_SYMBOL(_memcpy_toio); +EXPORT_SYMBOL(_memset_io); /* address translation */ #ifndef __virt_to_phys__is_a_macro @@ -98,7 +134,9 @@ EXPORT_SYMBOL(__virt_to_bus); EXPORT_SYMBOL(__bus_to_virt); #endif +#ifndef CONFIG_NO_PGT_CACHE EXPORT_SYMBOL(quicklists); +#endif EXPORT_SYMBOL(__bad_pmd); EXPORT_SYMBOL(__bad_pmd_kernel); @@ -167,3 +205,17 @@ EXPORT_SYMBOL(find_next_zero_bit); EXPORT_SYMBOL(armidlist); EXPORT_SYMBOL(armidindex); EXPORT_SYMBOL(elf_platform); + + /* syscalls */ +EXPORT_SYMBOL(sys_write); +EXPORT_SYMBOL(sys_read); +EXPORT_SYMBOL(sys_lseek); +EXPORT_SYMBOL(sys_open); +EXPORT_SYMBOL(sys_exit); +EXPORT_SYMBOL(sys_wait4); + + /* semaphores */ +EXPORT_SYMBOL_NOVERS(__down_failed); +EXPORT_SYMBOL_NOVERS(__down_interruptible_failed); +EXPORT_SYMBOL_NOVERS(__up_wakeup); + diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c new file mode 100644 index 000000000..9994fdd4a --- /dev/null +++ b/arch/arm/kernel/arthur.c @@ -0,0 +1,88 @@ +/* + * Arthur personality + * Copyright (C) 1998 Philip Blundell + */ + +#include <linux/personality.h> +#include <linux/module.h> +#include <linux/stddef.h> +#include <linux/signal.h> +#include <linux/sched.h> + +#include <asm/ptrace.h> + +/* RISC OS doesn't have many signals, and a lot of those that it does + have don't map easily to any Linux equivalent. Never mind. */ + +#define RISCOS_SIGABRT 1 +#define RISCOS_SIGFPE 2 +#define RISCOS_SIGILL 3 +#define RISCOS_SIGINT 4 +#define RISCOS_SIGSEGV 5 +#define RISCOS_SIGTERM 6 +#define RISCOS_SIGSTAK 7 +#define RISCOS_SIGUSR1 8 +#define RISCOS_SIGUSR2 9 +#define RISCOS_SIGOSERROR 10 + +static unsigned long riscos_to_linux_signals[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +static unsigned long linux_to_riscos_signals[32] = { + 0, -1, RISCOS_SIGINT, -1, + RISCOS_SIGILL, 5, RISCOS_SIGABRT, 7, + RISCOS_SIGFPE, 9, RISCOS_SIGUSR1, RISCOS_SIGSEGV, + RISCOS_SIGUSR2, 13, 14, RISCOS_SIGTERM, + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 +}; + +static void arthur_lcall7(int nr, struct pt_regs *regs) +{ + struct siginfo info; + info.si_signo = SIGSWI; + info.si_code = nr; + /* Bounce it to the emulator */ + send_sig_info(SIGSWI, &info, current); +} + +static struct exec_domain riscos_exec_domain = { + "Arthur", /* name */ + (lcall7_func)arthur_lcall7, + PER_RISCOS, PER_RISCOS, + riscos_to_linux_signals, + linux_to_riscos_signals, +#ifdef MODULE + &__this_module, /* No usage counter. */ +#else + NULL, +#endif + NULL /* Nothing after this in the list. */ +}; + +/* + * We could do with some locking to stop Arthur being removed while + * processes are using it. + */ + +#ifdef MODULE +int init_module(void) +#else +int initialise_arthur(void) +#endif +{ + return register_exec_domain(&riscos_exec_domain); +} + +#ifdef MODULE +void cleanup_module(void) +{ + unregister_exec_domain(&riscos_exec_domain); +} +#endif diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 46f71fa92..154e3aeab 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -31,7 +31,7 @@ .long SYMBOL_NAME(sys_lseek) /* 20 */ .long SYMBOL_NAME(sys_getpid) .long SYMBOL_NAME(sys_mount_wrapper) - .long SYMBOL_NAME(sys_umount) + .long SYMBOL_NAME(sys_oldumount) .long SYMBOL_NAME(sys_setuid) .long SYMBOL_NAME(sys_getuid) /* 25 */ .long SYMBOL_NAME(sys_stime) @@ -61,7 +61,7 @@ .long SYMBOL_NAME(sys_geteuid) /* 50 */ .long SYMBOL_NAME(sys_getegid) .long SYMBOL_NAME(sys_acct) - .long SYMBOL_NAME(sys_ni_syscall) /* was sys_phys */ + .long SYMBOL_NAME(sys_umount) .long SYMBOL_NAME(sys_ni_syscall) /* was sys_lock */ .long SYMBOL_NAME(sys_ioctl) /* 55 */ .long SYMBOL_NAME(sys_fcntl) @@ -110,7 +110,7 @@ .long SYMBOL_NAME(sys_ni_syscall) /* was sys_profil */ .long SYMBOL_NAME(sys_statfs) /* 100 */ .long SYMBOL_NAME(sys_fstatfs) - .long SYMBOL_NAME(sys_ni_syscall) /* .long _sys_ioperm */ + .long SYMBOL_NAME(sys_ni_syscall) .long SYMBOL_NAME(sys_socketcall) .long SYMBOL_NAME(sys_syslog) .long SYMBOL_NAME(sys_setitimer) @@ -119,7 +119,7 @@ .long SYMBOL_NAME(sys_newlstat) .long SYMBOL_NAME(sys_newfstat) .long SYMBOL_NAME(sys_uname) -/* 110 */ .long SYMBOL_NAME(sys_iopl) +/* 110 */ .long SYMBOL_NAME(sys_ni_syscall) .long SYMBOL_NAME(sys_vhangup) .long SYMBOL_NAME(sys_idle) .long SYMBOL_NAME(sys_syscall) /* call a syscall */ @@ -196,6 +196,10 @@ .long SYMBOL_NAME(sys_capget) /* 185 */ .long SYMBOL_NAME(sys_capset) .long SYMBOL_NAME(sys_sigaltstack_wrapper) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) +/* 190 */ .long SYMBOL_NAME(sys_vfork_wrapper) .rept NR_syscalls-186 .long SYMBOL_NAME(sys_ni_syscall) diff --git a/arch/arm/kernel/dec21285.c b/arch/arm/kernel/dec21285.c index aa66ee04a..c4103abee 100644 --- a/arch/arm/kernel/dec21285.c +++ b/arch/arm/kernel/dec21285.c @@ -8,17 +8,17 @@ #include <linux/pci.h> #include <linux/ptrace.h> #include <linux/interrupt.h> +#include <linux/mm.h> #include <linux/init.h> #include <asm/irq.h> #include <asm/system.h> +#include <asm/hardware.h> -#define MAX_SLOTS 20 +#define MAX_SLOTS 21 extern void pcibios_fixup_ebsa285(struct pci_dev *dev); extern void pcibios_init_ebsa285(void); -extern void pcibios_fixup_vnc(struct pci_dev *dev); -extern void pcibios_init_vnc(void); int pcibios_present(void) @@ -33,11 +33,12 @@ pcibios_base_address(unsigned char bus, unsigned char dev_fn) int slot = PCI_SLOT(dev_fn); if (slot < MAX_SLOTS) - return 0xf8c00000 + (slot << 11) + (PCI_FUNC(dev_fn) << 8); + return PCICFG0_BASE + 0xc00000 + + (slot << 11) + (PCI_FUNC(dev_fn) << 8); else return 0; } else - return 0xf9000000 | (bus << 16) | (dev_fn << 8); + return PCICFG1_BASE | (bus << 16) | (dev_fn << 8); } int @@ -151,10 +152,7 @@ __initfunc(void pcibios_fixup(void)) struct pci_dev *dev; for (dev = pci_devices; dev; dev = dev->next) { - if (machine_is_ebsa285() || machine_is_cats()) - pcibios_fixup_ebsa285(dev); - if (machine_is_netwinder()) - pcibios_fixup_vnc(dev); + pcibios_fixup_ebsa285(dev); pcibios_write_config_byte(dev->bus->number, dev->devfn, PCI_INTERRUPT_LINE, dev->irq); @@ -164,18 +162,83 @@ __initfunc(void pcibios_fixup(void)) dev->bus->number, dev->devfn, dev->vendor, dev->device, dev->irq); } - if (machine_is_netwinder()) - hw_init(); + + hw_init(); } __initfunc(void pcibios_init(void)) { - if (machine_is_ebsa285() || machine_is_cats()) - pcibios_init_ebsa285(); - if (machine_is_netwinder()) - pcibios_init_vnc(); - - printk("DEC21285 PCI revision %02X\n", *(unsigned char *)0xfe000008); + unsigned int mem_size = (unsigned int)high_memory - PAGE_OFFSET; + unsigned long cntl; + + *CSR_SDRAMBASEMASK = (mem_size - 1) & 0x0ffc0000; + *CSR_SDRAMBASEOFFSET = 0; + *CSR_ROMBASEMASK = 0x80000000; + *CSR_CSRBASEMASK = 0; + *CSR_CSRBASEOFFSET = 0; + *CSR_PCIADDR_EXTN = 0; + +#ifdef CONFIG_HOST_FOOTBRIDGE + /* + * Against my better judgement, Philip Blundell still seems + * to be saying that we should initialise the PCI stuff here + * when the PCI_CFN bit is not set, dispite my comment below, + * which he decided to remove. If it is not set, then + * the card is in add-in mode, and we're in a machine where + * the bus is set up by 'others'. + * + * We should therefore not mess about with the mapping in + * anyway, and we should not be using the virt_to_bus functions + * that exist in the HOST architecture mode (since they assume + * a fixed mapping). + * + * Instead, you should be using ADDIN mode, which allows for + * this situation. This does assume that you have correctly + * initialised the PCI bus, which you must have done to get + * your PC booted. + * + * Unfortunately, he seems to be blind to this. I guess he'll + * also remove all this. + * + * And THIS COMMENT STAYS, even if this gets patched, thank + * you. + */ + + /* + * Map our SDRAM at a known address in PCI space, just in case + * the firmware had other ideas. Using a nonzero base is + * necessary, since some VGA cards forcefully use PCI addresses + * in the range 0x000a0000 to 0x000c0000. (eg, S3 cards). + * + * NOTE! If you need to chec the PCI_CFN bit in the SA110 + * control register then you've configured the kernel wrong. + * If you're not using host mode, then DO NOT set + * CONFIG_HOST_FOOTBRIDGE, but use CONFIG_ADDIN_FOOTBRIDGE + * instead. In this case, you MUST supply some firmware + * to allow your PC to boot, plus we should not modify the + * mappings that the PC BIOS has set up for us. + */ + *CSR_PCICACHELINESIZE = 0x00002008; + *CSR_PCICSRBASE = 0; + *CSR_PCICSRIOBASE = 0; + *CSR_PCISDRAMBASE = virt_to_bus((void *)PAGE_OFFSET); + *CSR_PCIROMBASE = 0; + *CSR_PCICMD = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | PCI_COMMAND_FAST_BACK | + PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY | + (1 << 31) | (1 << 29) | (1 << 28) | (1 << 24); +#endif + + /* + * Clear any existing errors - we aren't + * interested in historical data... + */ + cntl = *CSR_SA110_CNTL & 0xffffde07; + *CSR_SA110_CNTL = cntl | SA110_CNTL_RXSERR; + + pcibios_init_ebsa285(); + + printk(KERN_DEBUG"PCI: DEC21285 revision %02lX\n", *CSR_CLASSREV & 0xff); } __initfunc(void pcibios_fixup_bus(struct pci_bus *bus)) diff --git a/arch/arm/kernel/dma-a5k.c b/arch/arm/kernel/dma-a5k.c index 18bbf0c9c..df02ea54e 100644 --- a/arch/arm/kernel/dma-a5k.c +++ b/arch/arm/kernel/dma-a5k.c @@ -12,7 +12,6 @@ #include <asm/fiq.h> #include <asm/io.h> #include <asm/hardware.h> -#include <asm/pgtable.h> #include "dma.h" @@ -37,8 +36,9 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) if (channel != DMA_VIRTUAL_FLOPPY) printk("arch_dma_count: invalid channel %d\n", channel); else { - extern int floppy_fiqresidual(void); - return floppy_fiqresidual(); + struct pt_regs regs; + get_fiq_regs(®s); + return regs.ARM_r9; } return 0; } @@ -48,6 +48,7 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) if (channel != DMA_VIRTUAL_FLOPPY) printk("arch_enable_dma: invalid channel %d\n", channel); else { + struct pt_regs regs; void *fiqhandler_start; unsigned int fiqhandler_length; extern void floppy_fiqsetup(unsigned long len, unsigned long addr, @@ -67,8 +68,10 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) return; } memcpy((void *)0x1c, fiqhandler_start, fiqhandler_length); - flush_page_to_ram(0); - floppy_fiqsetup(dma->buf.length, __bus_to_virt(dma->buf.address), (int)PCIO_FLOPPYDMABASE); + regs.ARM_r9 = dma->buf.length; + regs.ARM_r10 = __bus_to_virt(dma->buf.address); + regs.ARM_fp = (int)PCIO_FLOPPYDMABASE; + set_fiq_regs(®s); enable_irq(dma->dma_irq); } } @@ -83,6 +86,11 @@ void arch_disable_dma(dmach_t channel, dma_t *dma) } } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns) +{ + return 0; +} + __initfunc(void arch_dma_init(dma_t *dma)) { dma[DMA_VIRTUAL_FLOPPY].dma_irq = 64; diff --git a/arch/arm/kernel/dma-arc.c b/arch/arm/kernel/dma-arc.c index 27a139ad4..9be27bdae 100644 --- a/arch/arm/kernel/dma-arc.c +++ b/arch/arm/kernel/dma-arc.c @@ -1,10 +1,11 @@ /* * arch/arm/kernel/dma-arc.c * - * Copyright (C) 1998 Dave Gilbert / Russell King + * Copyright (C) 1998-1999 Dave Gilbert / Russell King * * DMA functions specific to Archimedes architecture */ +#include <linux/config.h> #include <linux/sched.h> #include <linux/init.h> @@ -14,7 +15,7 @@ #include "dma.h" -int arch_request_dma(dmach_t channel, dma_t *dma) +int arch_request_dma(dmach_t channel, dma_t *dma, const char * dev_id) { if (channel == DMA_VIRTUAL_FLOPPY0 || channel == DMA_VIRTUAL_FLOPPY1) @@ -25,16 +26,12 @@ int arch_request_dma(dmach_t channel, dma_t *dma) void arch_free_dma(dmach_t channel, dma_t *dma) { - if (channel != DMA_VIRTUAL_FLOPPY0 && - channel != DMA_VIRTUAL_FLOPPY1) - return 0; - else - return -EINVAL; } void arch_enable_dma(dmach_t channel, dma_t *dma) { switch (channel) { +#ifdef CONFIG_BLK_DEV_FD case DMA_VIRTUAL_FLOPPY0: { /* Data DMA */ switch (dma->dma_mode) { case DMA_MODE_READ: /* read */ @@ -96,9 +93,38 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) restore_flags(flags); } break; +#endif } } +int arch_get_dma_residue(dmach_t channel, dma_t *dma) +{ + switch (channel) { +#ifdef CONFIG_BLK_DEV_FD + case DMA_VIRTUAL_FLOPPY0: { /* Data DMA */ + extern unsigned int fdc1772_bytestogo; + + /* 10/1/1999 DAG - I presume its the number of bytes left? */ + return fdc1772_bytestogo; + }; + break; + + case DMA_VIRTUAL_FLOPPY1: { /* Command completed */ + /* 10/1/1999 DAG - Presume whether there is an outstanding command? */ + extern unsigned int fdc1772_fdc_int_done; + + return (fdc1772_fdc_int_done==0)?1:0; /* Explicit! If the int done is 0 then 1 int to go */ + }; + break; + +#endif + + default: + printk("dma-arc.c:arch_get_dma_residue called with unknown/unconfigured DMA channel\n"); + return 0; + }; +} + void arch_disable_dma(dmach_t channel, dma_t *dma) { if (channel != DMA_VIRTUAL_FLOPPY0 && @@ -108,6 +134,11 @@ void arch_disable_dma(dmach_t channel, dma_t *dma) disable_irq(dma->dma_irq); } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns) +{ + return 0; +} + __initfunc(void arch_dma_init(dma_t *dma)) { dma[DMA_VIRTUAL_FLOPPY0].dma_irq = 64; diff --git a/arch/arm/kernel/dma-dummy.c b/arch/arm/kernel/dma-dummy.c index be72a8965..db46ef1c3 100644 --- a/arch/arm/kernel/dma-dummy.c +++ b/arch/arm/kernel/dma-dummy.c @@ -9,6 +9,10 @@ #include <linux/errno.h> #include <linux/init.h> +#include <asm/spinlock.h> + +spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED; + int request_dma(int channel, const char *device_id) { return -EINVAL; diff --git a/arch/arm/kernel/dma-ebsa285.c b/arch/arm/kernel/dma-footbridge.c index f1c42dac2..a355283dc 100644 --- a/arch/arm/kernel/dma-ebsa285.c +++ b/arch/arm/kernel/dma-footbridge.c @@ -6,7 +6,9 @@ * DMA functions specific to EBSA-285/CATS architectures * * Changelog: - * 09/11/1998 RMK Split out ISA DMA functions to dma-isa.c + * 09-Nov-1998 RMK Split out ISA DMA functions to dma-isa.c + * 17-Mar-1999 RMK Allow any EBSA285-like architecture to have + * ISA DMA controllers. */ #include <linux/config.h> @@ -16,7 +18,6 @@ #include <linux/init.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/dma.h> #include <asm/io.h> #include <asm/hardware.h> @@ -24,16 +25,22 @@ #include "dma.h" #include "dma-isa.h" +#ifdef CONFIG_ISA_DMA +static int has_isa_dma; +#else +#define has_isa_dma 0 +#endif + int arch_request_dma(dmach_t channel, dma_t *dma, const char *dev_name) { switch (channel) { - case 0: - case 1: /* 21285 internal channels */ + case _DC21285_DMA(0): + case _DC21285_DMA(1): /* 21285 internal channels */ return 0; - case 2 ... 9: - if (machine_is_cats()) - return isa_request_dma(channel - 2, dma, dev_name); + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + return isa_request_dma(channel - _ISA_DMA(0), dma, dev_name); } return -EINVAL; @@ -49,14 +56,13 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) int residue = 0; switch (channel) { - case 0: - case 1: + case _DC21285_DMA(0): + case _DC21285_DMA(1): break; -#ifdef CONFIG_CATS - case 2 ... 9: - if (machine_is_cats()) - residue = isa_get_dma_residue(channel - 2); -#endif + + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + residue = isa_get_dma_residue(channel - _ISA_DMA(0), dma); } return residue; } @@ -64,38 +70,43 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) void arch_enable_dma(dmach_t channel, dma_t *dma) { switch (channel) { - case 0: - case 1: + case _DC21285_DMA(0): + case _DC21285_DMA(1): /* * Not yet implemented */ break; -#ifdef CONFIG_CATS - case 2 ... 9: - if (machine_is_cats()) - isa_enable_dma(channel - 2, dma); -#endif + + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + isa_enable_dma(channel - _ISA_DMA(0), dma); } } void arch_disable_dma(dmach_t channel, dma_t *dma) { switch (channel) { - case 0: - case 1: + case _DC21285_DMA(0): + case _DC21285_DMA(1): /* * Not yet implemented */ break; -#ifdef CONFIG_CATS - case 2 ... 9: - if (machine_is_cats()) - isa_disable_dma(channel - 2, dma); -#endif + + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + isa_disable_dma(channel - _ISA_DMA(0), dma); } } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns) +{ + return 0; +} + __initfunc(void arch_dma_init(dma_t *dma)) { - /* Nothing to do */ +#ifdef CONFIG_ISA_DMA + has_isa_dma = isa_init_dma(); +#endif } diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c index bdf7c6147..19be50433 100644 --- a/arch/arm/kernel/dma-isa.c +++ b/arch/arm/kernel/dma-isa.c @@ -11,6 +11,7 @@ * Copyright (C) 1998 Phil Blundell */ #include <linux/sched.h> +#include <linux/init.h> #include <asm/dma.h> #include <asm/io.h> @@ -18,6 +19,11 @@ #include "dma.h" #include "dma-isa.h" +#define ISA_DMA_MODE_READ 0x44 +#define ISA_DMA_MODE_WRITE 0x48 +#define ISA_DMA_MODE_CASCADE 0xc0 +#define ISA_DMA_AUTOINIT 0x10 + #define ISA_DMA_MASK 0 #define ISA_DMA_MODE 1 #define ISA_DMA_CLRFF 2 @@ -40,10 +46,7 @@ static unsigned int isa_dma_port[8][7] = { int isa_request_dma(int channel, dma_t *dma, const char *dev_name) { - if (channel != 4) - return 0; - - return -EINVAL; + return 0; } void isa_free_dma(int channel, dma_t *dma) @@ -56,25 +59,27 @@ int isa_get_dma_residue(int channel, dma_t *dma) unsigned int io_port = isa_dma_port[channel][ISA_DMA_COUNT]; int count; - count = 1 + inb(io_port) + (inb(io_port) << 8); + count = 1 + inb(io_port); + count |= inb(io_port) << 8; return channel < 4 ? count : (count << 1); } void isa_enable_dma(int channel, dma_t *dma) { - unsigned long address, length; - if (dma->invalid) { + unsigned long address, length; + unsigned int mode; + address = dma->buf.address; length = dma->buf.length - 1; - outb(address >> 24, isa_dma_port[channel][ISA_DMA_PGHI]); outb(address >> 16, isa_dma_port[channel][ISA_DMA_PGLO]); + outb(address >> 24, isa_dma_port[channel][ISA_DMA_PGHI]); if (channel >= 4) { address >>= 1; - length = (length >> 1) & 0xfe; /* why &0xfe? */ + length >>= 1; } outb(0, isa_dma_port[channel][ISA_DMA_CLRFF]); @@ -85,17 +90,31 @@ void isa_enable_dma(int channel, dma_t *dma) outb(length, isa_dma_port[channel][ISA_DMA_COUNT]); outb(length >> 8, isa_dma_port[channel][ISA_DMA_COUNT]); - outb(dma->dma_mode | (channel & 3), isa_dma_port[channel][ISA_DMA_MODE]); + mode = channel & 3; - switch (dma->dma_mode) { + switch (dma->dma_mode & DMA_MODE_MASK) { case DMA_MODE_READ: + mode |= ISA_DMA_MODE_READ; dma_cache_inv(__bus_to_virt(dma->buf.address), dma->buf.length); break; case DMA_MODE_WRITE: + mode |= ISA_DMA_MODE_WRITE; dma_cache_wback(__bus_to_virt(dma->buf.address), dma->buf.length); break; + + case DMA_MODE_CASCADE: + mode |= ISA_DMA_MODE_CASCADE; + break; + + default: + break; } + + if (dma->dma_mode & DMA_AUTOINIT) + mode |= ISA_DMA_AUTOINIT; + + outb(mode, isa_dma_port[channel][ISA_DMA_MODE]); dma->invalid = 0; } outb(channel & 3, isa_dma_port[channel][ISA_DMA_MASK]); @@ -105,3 +124,56 @@ void isa_disable_dma(int channel, dma_t *dma) { outb(channel | 4, isa_dma_port[channel][ISA_DMA_MASK]); } + +__initfunc(int isa_init_dma(void)) +{ + int dmac_found; + + outb(0xff, 0x0d); + outb(0xff, 0xda); + + outb(0x55, 0x00); + outb(0xaa, 0x00); + + dmac_found = inb(0x00) == 0x55 && inb(0x00) == 0xaa; + + if (dmac_found) { + int channel; + + for (channel = 0; channel < 8; channel++) + isa_disable_dma(channel, NULL); + + outb(0x40, 0x0b); + outb(0x41, 0x0b); + outb(0x42, 0x0b); + outb(0x43, 0x0b); + + outb(0xc0, 0xd6); + outb(0x41, 0xd6); + outb(0x42, 0xd6); + outb(0x43, 0xd6); + + outb(0, 0xd4); + + outb(0x10, 0x08); + outb(0x10, 0xd0); + + /* + * Is this correct? According to + * my documentation, it doesn't + * appear to be. It should be + * outb(0x3f, 0x40b); outb(0x3f, 0x4d6); + */ + outb(0x30, 0x40b); + outb(0x31, 0x40b); + outb(0x32, 0x40b); + outb(0x33, 0x40b); + outb(0x31, 0x4d6); + outb(0x32, 0x4d6); + outb(0x33, 0x4d6); + + request_dma(DMA_ISA_CASCADE, "cascade"); + } + + return dmac_found; +} diff --git a/arch/arm/kernel/dma-isa.h b/arch/arm/kernel/dma-isa.h index 3fcbdb3c7..2640f6c3a 100644 --- a/arch/arm/kernel/dma-isa.h +++ b/arch/arm/kernel/dma-isa.h @@ -23,3 +23,7 @@ void isa_enable_dma(int channel, dma_t *dma); */ void isa_disable_dma(int channel, dma_t *dma); +/* + * Initialise DMA + */ +int isa_init_dma(void); diff --git a/arch/arm/kernel/dma-rpc.c b/arch/arm/kernel/dma-rpc.c index 00cd95e79..d3fcd9116 100644 --- a/arch/arm/kernel/dma-rpc.c +++ b/arch/arm/kernel/dma-rpc.c @@ -11,10 +11,10 @@ #include <linux/init.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/dma.h> #include <asm/fiq.h> #include <asm/io.h> +#include <asm/iomd.h> #include <asm/hardware.h> #include <asm/uaccess.h> @@ -223,8 +223,9 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) break; case DMA_VIRTUAL_FLOPPY: { - extern int floppy_fiqresidual(void); - residue = floppy_fiqresidual(); + struct pt_regs regs; + get_fiq_regs(®s); + return regs.ARM_r9; } break; } @@ -286,7 +287,6 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) set_fiq_handler(fiqhandler_start, fiqhandler_length); set_fiq_regs(®s); enable_irq(dma->dma_irq); - } break; @@ -319,6 +319,46 @@ void arch_disable_dma(dmach_t channel, dma_t *dma) } } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle) +{ + int tcr, speed; + + if (cycle < 188) + speed = 3; + else if (cycle <= 250) + speed = 2; + else if (cycle < 438) + speed = 1; + else + speed = 0; + + tcr = inb(IOMD_DMATCR); + speed &= 3; + + switch (channel) { + case DMA_0: + tcr = (tcr & ~0x03) | speed; + break; + + case DMA_1: + tcr = (tcr & ~0x0c) | (speed << 2); + break; + + case DMA_2: + tcr = (tcr & ~0x30) | (speed << 4); + break; + + case DMA_3: + tcr = (tcr & ~0xc0) | (speed << 6); + break; + + default: + break; + } + + outb(tcr, IOMD_DMATCR); +} + __initfunc(void arch_dma_init(dma_t *dma)) { outb(0, IOMD_IO0CR); @@ -326,7 +366,7 @@ __initfunc(void arch_dma_init(dma_t *dma)) outb(0, IOMD_IO2CR); outb(0, IOMD_IO3CR); -// outb(0xf0, IOMD_DMATCR); + outb(0xa0, IOMD_DMATCR); dma[0].dma_base = ioaddr(IOMD_IO0CURA); dma[0].dma_irq = IRQ_DMA0; diff --git a/arch/arm/kernel/dma-vnc.c b/arch/arm/kernel/dma-vnc.c deleted file mode 100644 index 132fa627a..000000000 --- a/arch/arm/kernel/dma-vnc.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * arch/arm/kernel/dma-vnc.c - * - * Copyright (C) 1998 Russell King - */ -#include <linux/sched.h> -#include <linux/malloc.h> -#include <linux/mman.h> -#include <linux/init.h> - -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/dma.h> -#include <asm/io.h> -#include <asm/hardware.h> - -#include "dma.h" -#include "dma-isa.h" - -int arch_request_dma(dmach_t channel, dma_t *dma, const char *dev_name) -{ - if (channel < 8) - return isa_request_dma(channel, dma, dev_name); - return -EINVAL; -} - -void arch_free_dma(dmach_t channel, dma_t *dma) -{ - isa_free_dma(channel, dma); -} - -int arch_get_dma_residue(dmach_t channel, dma_t *dma) -{ - return isa_get_dma_residue(channel, dma); -} - -void arch_enable_dma(dmach_t channel, dma_t *dma) -{ - isa_enable_dma(channel, dma); -} - -void arch_disable_dma(dmach_t channel, dma_t *dma) -{ - isa_disable_dma(channel, dma); -} - -__initfunc(void arch_dma_init(dma_t *dma)) -{ - /* Nothing to do */ -} - diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index a164073ae..219e1f0f2 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -21,7 +21,6 @@ #include <linux/init.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/irq.h> #include <asm/hardware.h> #include <asm/io.h> @@ -201,6 +200,12 @@ void disable_dma (dmach_t channel) printk (KERN_ERR "Trying to disable free DMA%d\n", channel); } +void set_dma_speed(dmach_t channel, int cycle_ns) +{ + dma_chan[channel].speed = + arch_set_dma_speed(channel, &dma_chan[channel], cycle_ns); +} + int get_dma_residue(dmach_t channel) { return arch_get_dma_residue(channel, &dma_chan[channel]); @@ -214,6 +219,7 @@ EXPORT_SYMBOL(set_dma_count); EXPORT_SYMBOL(set_dma_mode); EXPORT_SYMBOL(get_dma_residue); EXPORT_SYMBOL(set_dma_sg); +EXPORT_SYMBOL(set_dma_speed); __initfunc(void init_dma(void)) { diff --git a/arch/arm/kernel/dma.h b/arch/arm/kernel/dma.h index e4c72c6af..33db3b03b 100644 --- a/arch/arm/kernel/dma.h +++ b/arch/arm/kernel/dma.h @@ -15,6 +15,7 @@ typedef struct { unsigned int active:1; /* Transfer active */ unsigned int invalid:1; /* Address/Count changed */ dmamode_t dma_mode; /* DMA mode */ + int speed; /* DMA speed */ unsigned int lock; /* Device is allocated */ const char *device_id; /* Device name */ @@ -63,6 +64,15 @@ void arch_disable_dma(dmach_t channel, dma_t *dma); */ int arch_get_dma_residue(dmach_t channel, dma_t *dma); +/* Prototype: int arch_set_dma_speed(channel, dma, cycle) + * Purpose : Convert a cycle time to a register setting + * Params : channel - DMA channel number + * : dma - DMA structure for channel + * : cycle - cycle time in NS + * Returns : setting for 'dma->speed' + */ +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle); + /* Prototype: void arch_dma_init(dma) * Purpose : Initialise architecture specific DMA * Params : dma - pointer to array of DMA structures diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index fe1c75f5c..dd4bf670c 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -7,32 +7,43 @@ * * Created from information from Acorns RiscOS3 PRMs * - * 08-Dec-1996 RMK Added code for the 9'th expansion card - the ether podule slot. + * 08-Dec-1996 RMK Added code for the 9'th expansion card - the ether + * podule slot. * 06-May-1997 RMK Added blacklist for cards whose loader doesn't work. - * 12-Sep-1997 RMK Created new handling of interrupt enables/disables - cards can - * now register their own routine to control interrupts (recommended). - * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled on reset from - * Linux. (Caused cards not to respond under RiscOS without hard reset). + * 12-Sep-1997 RMK Created new handling of interrupt enables/disables + * - cards can now register their own routine to control + * interrupts (recommended). + * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled + * on reset from Linux. (Caused cards not to respond + * under RiscOS without hard reset). * 15-Feb-1998 RMK Added DMA support * 12-Sep-1998 RMK Added EASI support + * 10-Jan-1999 RMK Run loaders in a simulated RISC OS environment. + * 17-Apr-1999 RMK Support for EASI Type C cycles. */ #define ECARD_C +#define __KERNEL_SYSCALLS__ #include <linux/config.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/malloc.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/unistd.h> #include <linux/init.h> -#include <asm/io.h> -#include <asm/hardware.h> +#include <asm/dma.h> #include <asm/ecard.h> +#include <asm/hardware.h> +#include <asm/io.h> #include <asm/irq.h> -#include <asm/dma.h> +#include <asm/pgtable.h> #ifdef CONFIG_ARCH_ARC #include <asm/arch/oldlatches.h> @@ -40,45 +51,420 @@ #define oldlatch_init() #endif -#define BLACKLIST_NAME(m,p,s) { m, p, NULL, s } -#define BLACKLIST_LOADER(m,p,l) { m, p, l, NULL } -#define BLACKLIST_NOLOADER(m,p) { m, p, noloader, blacklisted_str } -#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE) +enum req { + req_readbytes, + req_reset +}; -extern unsigned long atomwide_serial_loader[], oak_scsi_loader[], noloader[]; -static const char blacklisted_str[] = "*loader s/w is not 32-bit compliant*"; +struct ecard_request { + enum req req; + ecard_t *ec; + unsigned int address; + unsigned int length; + unsigned int use_loader; + void *buffer; +}; -static const struct expcard_blacklist { +struct expcard_blacklist { unsigned short manufacturer; unsigned short product; - const loader_t loader; const char *type; -} blacklist[] = { -/* Cards without names */ - BLACKLIST_NAME(MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1"), - -/* Cards with corrected loader */ - BLACKLIST_LOADER(MANU_ATOMWIDE, PROD_ATOMWIDE_3PSERIAL, atomwide_serial_loader), - BLACKLIST_LOADER(MANU_OAK, PROD_OAK_SCSI, oak_scsi_loader), +}; -/* Supported cards with broken loader */ - { MANU_ALSYSTEMS, PROD_ALSYS_SCSIATAPI, noloader, "AlSystems PowerTec SCSI" }, +static ecard_t *cards; +static ecard_t *slot_to_expcard[MAX_ECARDS]; +static unsigned int ectcr; +#ifdef HAS_EXPMASK +static unsigned int have_expmask; +#endif -/* Unsupported cards with no loader */ - BLACKLIST_NOLOADER(MANU_MCS, PROD_MCS_CONNECT32) +/* List of descriptions of cards which don't have an extended + * identification, or chunk directories containing a description. + */ +static const struct expcard_blacklist __init blacklist[] = { + { MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" } }; +asmlinkage extern int +ecard_loader_reset(volatile unsigned char *pa, loader_t loader); +asmlinkage extern int +ecard_loader_read(int off, volatile unsigned char *pa, loader_t loader); extern int setup_arm_irq(int, struct irqaction *); +extern void do_ecard_IRQ(int, struct pt_regs *); + + +static void +ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs); + +static struct irqaction irqexpansioncard = { + ecard_irq_noexpmask, SA_INTERRUPT, 0, "expansion cards", NULL, NULL +}; + +static inline unsigned short +ecard_getu16(unsigned char *v) +{ + return v[0] | v[1] << 8; +} +static inline signed long +ecard_gets24(unsigned char *v) +{ + return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0); +} + +static inline ecard_t * +slot_to_ecard(unsigned int slot) +{ + return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL; +} + +/* ===================== Expansion card daemon ======================== */ /* - * from linux/arch/arm/kernel/irq.c + * Since the loader programs on the expansion cards need to be run + * in a specific environment, create a separate task with this + * environment up, and pass requests to this task as and when we + * need to. + * + * This should allow 99% of loaders to be called from Linux. + * + * From a security standpoint, we trust the card vendors. This + * may be a misplaced trust. */ -extern void do_ecard_IRQ(int irq, struct pt_regs *); +#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE) +#define POD_INT_ADDR(x) ((volatile unsigned char *)\ + ((BUS_ADDR((x)) - IO_BASE) + IO_START)) -static ecard_t expcard[MAX_ECARDS]; -static signed char irqno_to_expcard[16]; -static unsigned int ecard_numcards, ecard_numirqcards; -static unsigned int have_expmask; +static void +ecard_task_reset(struct ecard_request *req) +{ + if (req->ec == NULL) { + ecard_t *ec; + + for (ec = cards; ec; ec = ec->next) { + printk(KERN_DEBUG "Resetting card %d\n", + ec->slot_no); + + if (ec->loader) + ecard_loader_reset(POD_INT_ADDR(ec->podaddr), + ec->loader); + } + printk(KERN_DEBUG "All cards reset\n"); + } else if (req->ec->loader) + ecard_loader_reset(POD_INT_ADDR(req->ec->podaddr), + req->ec->loader); +} + +static void +ecard_task_readbytes(struct ecard_request *req) +{ + unsigned char *buf = (unsigned char *)req->buffer; + volatile unsigned char *base_addr = + (volatile unsigned char *)POD_INT_ADDR(req->ec->podaddr); + unsigned int len = req->length; + + if (req->ec->slot_no == 8) { + /* + * The card maintains an index which + * increments the address into a 4096-byte + * page on each access. We need to keep + * track of the counter. + */ + static unsigned int index; + unsigned int offset, page; + unsigned char byte = 0; /* keep gcc quiet */ + + offset = req->address & 4095; + page = req->address >> 12; + + if (page > 256) + return; + + page *= 4; + + if (offset == 0 || index > offset) { + /* + * We need to reset the index counter. + */ + *base_addr = 0; + index = 0; + } + + while (index <= offset) { + byte = base_addr[page]; + index += 1; + } + + while (len--) { + *buf++ = byte; + if (len) { + byte = base_addr[page]; + index += 1; + } + } + } else { + unsigned int off = req->address; + + if (!req->use_loader || !req->ec->loader) { + off *= 4; + while (len--) { + *buf++ = base_addr[off]; + off += 4; + } + } else { + while(len--) { + /* + * The following is required by some + * expansion card loader programs. + */ + *(unsigned long *)0x108 = 0; + *buf++ = ecard_loader_read(off++, base_addr, + req->ec->loader); + } + } + } + +} + +#ifdef CONFIG_CPU_32 +static pid_t ecard_pid; +static wait_queue_head_t ecard_wait; +static wait_queue_head_t ecard_done; +static struct ecard_request *ecard_req; + +/* + * Set up the expansion card daemon's environment. + */ +static void +ecard_init_task(void) +{ + /* We want to set up the page tables for the following mapping: + * Virtual Physical + * 0x03000000 0x03000000 + * 0x03010000 unmapped + * 0x03210000 0x03210000 + * 0x03400000 unmapped + * 0x08000000 0x08000000 + * 0x10000000 unmapped + * + * FIXME: we don't follow this 100% yet. + */ + pgd_t *src_pgd, *dst_pgd; + unsigned int dst_addr = IO_START; + + src_pgd = pgd_offset(current->mm, IO_BASE); + dst_pgd = pgd_offset(current->mm, dst_addr); + + while (dst_addr < IO_START + IO_SIZE) { + *dst_pgd++ = *src_pgd++; + dst_addr += PGDIR_SIZE; + } + + flush_tlb_range(current->mm, IO_START, IO_START + IO_SIZE); + + dst_addr = EASI_START; + src_pgd = pgd_offset(current->mm, EASI_BASE); + dst_pgd = pgd_offset(current->mm, dst_addr); + + while (dst_addr < EASI_START + EASI_SIZE) { + *dst_pgd++ = *src_pgd++; + dst_addr += PGDIR_SIZE; + } + + flush_tlb_range(current->mm, EASI_START, EASI_START + EASI_SIZE); +} + +static int +ecard_task(void * unused) +{ + current->session = 1; + current->pgrp = 1; + + /* + * We don't want /any/ signals, not even SIGKILL + */ + sigfillset(¤t->blocked); + sigemptyset(¤t->signal); + + strcpy(current->comm, "kecardd"); + + /* + * Set up the environment + */ + ecard_init_task(); + + while (1) { + struct ecard_request *req; + + do { + req = xchg(&ecard_req, NULL); + + if (req == NULL) { + sigemptyset(¤t->signal); + interruptible_sleep_on(&ecard_wait); + } + } while (req == NULL); + + switch (req->req) { + case req_readbytes: + ecard_task_readbytes(req); + break; + + case req_reset: + ecard_task_reset(req); + break; + } + wake_up(&ecard_done); + } +} + +/* + * Wake the expansion card daemon to action our request. + * + * FIXME: The test here is not sufficient to detect if the + * kcardd is running. + */ +static inline void +ecard_call(struct ecard_request *req) +{ + /* + * If we're called from task 0, or from an + * interrupt (will be keyboard interrupt), + * we forcefully set up the memory map, and + * call the loader. We can't schedule, or + * sleep for this call. + */ + if ((current == task[0] || in_interrupt()) && + req->req == req_reset && req->ec == NULL) { + ecard_init_task(); + ecard_task_reset(req); + } else { + if (ecard_pid <= 0) + ecard_pid = kernel_thread(ecard_task, NULL, 0); + + ecard_req = req; + + wake_up(&ecard_wait); + + sleep_on(&ecard_done); + } +} +#else +/* + * On 26-bit processors, we don't need the kcardd thread to access the + * expansion card loaders. We do it directly. + */ +static inline void +ecard_call(struct ecard_request *req) +{ + if (req->req == req_reset) + ecard_task_reset(req); + else + ecard_task_readbytes(req); +} +#endif + +/* ======================= Mid-level card control ===================== */ +/* + * This is called to reset the loaders for each expansion card on reboot. + * + * This is required to make sure that the card is in the correct state + * that RiscOS expects it to be. + */ +void +ecard_reset(int slot) +{ + struct ecard_request req; + + req.req = req_reset; + + if (slot < 0) + req.ec = NULL; + else + req.ec = slot_to_ecard(slot); + + ecard_call(&req); + +#ifdef HAS_EXPMASK + if (have_expmask && slot < 0) { + have_expmask |= ~0; + EXPMASK_ENABLE = have_expmask; + } +#endif +} + +static void +ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld) +{ + struct ecard_request req; + + req.req = req_readbytes; + req.ec = ec; + req.address = off; + req.length = len; + req.use_loader = useld; + req.buffer = addr; + + ecard_call(&req); +} + +int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num) +{ + struct ex_chunk_dir excd; + int index = 16; + int useld = 0; + + if (!ec->cid.cd) + return 0; + + while(1) { + ecard_readbytes(&excd, ec, index, 8, useld); + index += 8; + if (c_id(&excd) == 0) { + if (!useld && ec->loader) { + useld = 1; + index = 0; + continue; + } + return 0; + } + if (c_id(&excd) == 0xf0) { /* link */ + index = c_start(&excd); + continue; + } + if (c_id(&excd) == 0x80) { /* loader */ + if (!ec->loader) { + ec->loader = (loader_t)kmalloc(c_len(&excd), + GFP_KERNEL); + if (ec->loader) + ecard_readbytes(ec->loader, ec, + (int)c_start(&excd), + c_len(&excd), useld); + else + return 0; + } + continue; + } + if (c_id(&excd) == id && num-- == 0) + break; + } + + if (c_id(&excd) & 0x80) { + switch (c_id(&excd) & 0x70) { + case 0x70: + ecard_readbytes((unsigned char *)excd.d.string, ec, + (int)c_start(&excd), c_len(&excd), + useld); + break; + case 0x00: + break; + } + } + cd->start_offset = c_start(&excd); + memcpy(cd->d.string, excd.d.string, 256); + return 1; +} + +/* ======================= Interrupt control ============================ */ static void ecard_def_irq_enable(ecard_t *ec, int irqnr) { @@ -100,6 +486,11 @@ static void ecard_def_irq_disable(ecard_t *ec, int irqnr) #endif } +static int ecard_def_irq_pending(ecard_t *ec) +{ + return !ec->irqmask || ec->irqaddr[0] & ec->irqmask; +} + static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr) { panic("ecard_def_fiq_enable called - impossible"); @@ -110,11 +501,18 @@ static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr) panic("ecard_def_fiq_disable called - impossible"); } +static int ecard_def_fiq_pending(ecard_t *ec) +{ + return !ec->fiqmask || ec->fiqaddr[0] & ec->fiqmask; +} + static expansioncard_ops_t ecard_default_ops = { ecard_def_irq_enable, ecard_def_irq_disable, + ecard_def_irq_pending, ecard_def_fiq_enable, - ecard_def_fiq_disable + ecard_def_fiq_disable, + ecard_def_fiq_pending }; /* @@ -125,10 +523,9 @@ static expansioncard_ops_t ecard_default_ops = { */ void ecard_enableirq(unsigned int irqnr) { - irqnr &= 7; - if (irqnr < MAX_ECARDS && irqno_to_expcard[irqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[irqnr]; + ecard_t *ec = slot_to_ecard(irqnr - 32); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -142,10 +539,9 @@ void ecard_enableirq(unsigned int irqnr) void ecard_disableirq(unsigned int irqnr) { - irqnr &= 7; - if (irqnr < MAX_ECARDS && irqno_to_expcard[irqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[irqnr]; + ecard_t *ec = slot_to_ecard(irqnr - 32); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -156,10 +552,9 @@ void ecard_disableirq(unsigned int irqnr) void ecard_enablefiq(unsigned int fiqnr) { - fiqnr &= 7; - if (fiqnr < MAX_ECARDS && irqno_to_expcard[fiqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[fiqnr]; + ecard_t *ec = slot_to_ecard(fiqnr); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -173,10 +568,9 @@ void ecard_enablefiq(unsigned int fiqnr) void ecard_disablefiq(unsigned int fiqnr) { - fiqnr &= 7; - if (fiqnr < MAX_ECARDS && irqno_to_expcard[fiqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[fiqnr]; + ecard_t *ec = slot_to_ecard(fiqnr); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -185,41 +579,89 @@ void ecard_disablefiq(unsigned int fiqnr) } } -static void ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs) +static void +ecard_dump_irq_state(ecard_t *ec) { - const int num_cards = ecard_numirqcards; - int i, called = 0; + printk(" %d: %sclaimed, ", + ec->slot_no, + ec->claimed ? "" : "not "); + + if (ec->ops && ec->ops->irqpending && + ec->ops != &ecard_default_ops) + printk("irq %spending\n", + ec->ops->irqpending(ec) ? "" : "not "); + else + printk("irqaddr %p, mask = %02X, status = %02X\n", + ec->irqaddr, ec->irqmask, *ec->irqaddr); +} - for (i = 0; i < num_cards; i++) { - if (expcard[i].claimed && expcard[i].irq && - (!expcard[i].irqmask || - expcard[i].irqaddr[0] & expcard[i].irqmask)) { - do_ecard_IRQ(expcard[i].irq, regs); - called ++; +static void +ecard_check_lockup(void) +{ + static int last, lockup; + ecard_t *ec; + + /* + * If the timer interrupt has not run since the last million + * unrecognised expansion card interrupts, then there is + * something seriously wrong. Disable the expansion card + * interrupts so at least we can continue. + * + * Maybe we ought to start a timer to re-enable them some time + * later? + */ + if (last == jiffies) { + lockup += 1; + if (lockup > 1000000) { + printk(KERN_ERR "\nInterrupt lockup detected - " + "disabling all expansion card interrupts\n"); + + disable_irq(IRQ_EXPANSIONCARD); + + printk("Expansion card IRQ state:\n"); + + for (ec = cards; ec; ec = ec->next) + ecard_dump_irq_state(ec); } + } else + lockup = 0; + + /* + * If we did not recognise the source of this interrupt, + * warn the user, but don't flood the user with these messages. + */ + if (!last || time_after(jiffies, last + 5*HZ)) { + last = jiffies; + printk(KERN_WARNING "Unrecognised interrupt from backplane\n"); } - cli(); - if (called == 0) { - static int last, lockup; - - if (last == jiffies) { - lockup += 1; - if (lockup > 1000000) { - printk(KERN_ERR "\nInterrupt lockup detected - disabling expansion card IRQs\n"); - disable_irq(intr_no); - printk("Expansion card IRQ state:\n"); - for (i = 0; i < num_cards; i++) - printk(" %d: %sclaimed, irqaddr = %p, irqmask = %X, status=%X\n", expcard[i].irq - 32, - expcard[i].claimed ? "" : "not", expcard[i].irqaddr, expcard[i].irqmask, *expcard[i].irqaddr); - } - } else - lockup = 0; +} + +static void +ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs) +{ + ecard_t *ec; + int called = 0; + + for (ec = cards; ec; ec = ec->next) { + int pending; + + if (!ec->claimed || ec->irq == NO_IRQ || ec->slot_no == 8) + continue; - if (!last || time_after(jiffies, last + 5*HZ)) { - last = jiffies; - printk(KERN_ERR "\nUnrecognised interrupt from backplane\n"); + if (ec->ops && ec->ops->irqpending) + pending = ec->ops->irqpending(ec); + else + pending = ecard_default_ops.irqpending(ec); + + if (pending) { + do_ecard_IRQ(ec->irq, regs); + called ++; } } + cli(); + + if (called == 0) + ecard_check_lockup(); } #ifdef HAS_EXPMASK @@ -234,31 +676,35 @@ static unsigned char first_set[] = 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 }; -static void ecard_irq_expmask(int intr_no, void *dev_id, struct pt_regs *regs) +static void +ecard_irq_expmask(int intr_no, void *dev_id, struct pt_regs *regs) { const unsigned int statusmask = 15; unsigned int status; status = EXPMASK_STATUS & statusmask; if (status) { - unsigned int irqno; + unsigned int slot; ecard_t *ec; again: - irqno = first_set[status]; - ec = expcard + irqno_to_expcard[irqno]; + slot = first_set[status]; + ec = slot_to_ecard(slot); if (ec->claimed) { unsigned int oldexpmask; /* - * this ugly code is so that we can operate a prioritorising system. + * this ugly code is so that we can operate a + * prioritorising system: + * * Card 0 highest priority * Card 1 * Card 2 * Card 3 lowest priority + * * Serial cards should go in 0/1, ethernet/scsi in 2/3 * otherwise you will lose serial data at high speeds! */ oldexpmask = have_expmask; - EXPMASK_ENABLE = (have_expmask &= priority_masks[irqno]); + EXPMASK_ENABLE = (have_expmask &= priority_masks[slot]); sti(); do_ecard_IRQ(ec->irq, regs); cli(); @@ -267,15 +713,18 @@ again: if (status) goto again; } else { - printk(KERN_WARNING "card%d: interrupt from unclaimed card???\n", irqno); - EXPMASK_ENABLE = (have_expmask &= ~(1 << irqno)); + printk(KERN_WARNING "card%d: interrupt from unclaimed " + "card???\n", slot); + EXPMASK_ENABLE = (have_expmask &= ~(1 << slot)); } } else printk(KERN_WARNING "Wild interrupt from backplane (masks)\n"); } -static int ecard_checkirqhw(void) +__initfunc(static void +ecard_probeirqhw(void)) { + ecard_t *ec; int found; EXPMASK_ENABLE = 0x00; @@ -283,62 +732,80 @@ static int ecard_checkirqhw(void) found = ((EXPMASK_STATUS & 15) == 0); EXPMASK_ENABLE = 0xff; - return found; + if (!found) + return; + + printk(KERN_DEBUG "Expansion card interrupt " + "management hardware found\n"); + + irqexpansioncard.handler = ecard_irq_expmask; + + /* for each card present, set a bit to '1' */ + have_expmask = 0x80000000; + + for (ec = cards; ec; ec = ec->next) + have_expmask |= 1 << ec->slot_no; + + EXPMASK_ENABLE = have_expmask; } +#else +#define ecard_probeirqhw() +#endif + +#ifndef IO_EC_MEMC8_BASE +#define IO_EC_MEMC8_BASE 0 #endif -static void ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld) +unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) { - extern int ecard_loader_read(int off, volatile unsigned int pa, loader_t loader); - unsigned char *a = (unsigned char *)addr; + unsigned long address = 0; + int slot = ec->slot_no; - if (ec->slot_no == 8) { - static unsigned int lowaddress; - unsigned int laddr, haddr; - unsigned char byte = 0; /* keep gcc quiet */ + if (ec->slot_no == 8) + return IO_EC_MEMC8_BASE; - laddr = off & 4095; /* number of bytes to read from offset + base addr */ - haddr = off >> 12; /* offset into card from base addr */ + ectcr &= ~(1 << slot); - if (haddr > 256) - return; + switch (type) { + case ECARD_MEMC: + if (slot < 4) + address = IO_EC_MEMC_BASE + (slot << 12); + break; - /* - * If we require a low address or address 0, then reset, and start again... - */ - if (!off || lowaddress > laddr) { - outb(0, ec->podaddr); - lowaddress = 0; - } - while (lowaddress <= laddr) { - byte = inb(ec->podaddr + haddr); - lowaddress += 1; - } - while (len--) { - *a++ = byte; - if (len) { - byte = inb(ec->podaddr + haddr); - lowaddress += 1; - } - } - } else { - if (!useld || !ec->loader) { - while(len--) - *a++ = inb(ec->podaddr + (off++)); - } else { - while(len--) { - *(unsigned long *)0x108 = 0; /* hack for some loaders!!! */ - *a++ = ecard_loader_read(off++, BUS_ADDR(ec->podaddr), ec->loader); - } - } + case ECARD_IOC: + if (slot < 4) + address = IO_EC_IOC_BASE + (slot << 12); +#ifdef IO_EC_IOC4_BASE + else + address = IO_EC_IOC4_BASE + ((slot - 4) << 12); +#endif + if (address) + address += speed << 17; + break; + +#ifdef IO_EC_EASI_BASE + case ECARD_EASI: + address = IO_EC_EASI_BASE + (slot << 22); + if (speed == ECARD_FAST) + ectcr |= 1 << slot; + break; +#endif } + +#ifdef IOMD_ECTCR + outb(ectcr, IOMD_ECTCR); +#endif + return address; } +static const char *unknown = "*unknown*"; + static int ecard_prints(char *buffer, ecard_t *ec) { char *start = buffer; - buffer += sprintf(buffer, "\n %d: ", ec->slot_no); + buffer += sprintf(buffer, " %d: %s ", ec->slot_no, + ec->type == ECARD_EASI ? "EASI" : " "); if (ec->cid.id == 0) { struct in_chunk_dir incd; @@ -346,28 +813,57 @@ static int ecard_prints(char *buffer, ecard_t *ec) buffer += sprintf(buffer, "[%04X:%04X] ", ec->cid.manufacturer, ec->cid.product); - if (!ec->card_desc && ec->cid.is && ec->cid.cd && - ecard_readchunk(&incd, ec, 0xf5, 0)) - ec->card_desc = incd.d.string; + if (!ec->card_desc && ec->cid.cd && + ecard_readchunk(&incd, ec, 0xf5, 0)) { + ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL); - if (!ec->card_desc) - ec->card_desc = "*unknown*"; + if (ec->card_desc) + strcpy(ec->card_desc, incd.d.string); + } - buffer += sprintf(buffer, "%s", ec->card_desc); + buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*"); } else - buffer += sprintf(buffer, "Simple card %d", ec->cid.id); + buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id); return buffer - start; } -static inline unsigned short ecard_getu16(unsigned char *v) +int get_ecard_dev_info(char *buf, char **start, off_t pos, int count, int wr) { - return v[0] | v[1] << 8; + ecard_t *ec = cards; + off_t at = 0; + int len, cnt; + + cnt = 0; + while (ec && count > cnt) { + len = ecard_prints(buf, ec); + at += len; + if (at >= pos) { + if (!*start) { + *start = buf + (pos - (at - len)); + cnt = at - pos; + } else + cnt += len; + buf += len; + } + ec = ec->next; + } + return (count > cnt) ? cnt : count; } -static inline signed long ecard_gets24(unsigned char *v) +static struct proc_dir_entry proc_ecard_devices = { + PROC_BUS_ECARD_DEVICES, 7, "devices", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations, + get_ecard_dev_info +}; + +static struct proc_dir_entry *proc_bus_ecard_dir; + +static void ecard_proc_init(void) { - return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0); + proc_bus_ecard_dir = create_proc_entry("ecard", S_IFDIR, proc_bus); + proc_register(proc_bus_ecard_dir, &proc_ecard_devices); } /* @@ -376,33 +872,39 @@ static inline signed long ecard_gets24(unsigned char *v) * If bit 1 of the first byte of the card is set, then the * card does not exist. */ -__initfunc(static int ecard_probe(int card, int freeslot, card_type_t type)) +__initfunc(static int +ecard_probe(int slot, card_type_t type)) { - ecard_t *ec = expcard + freeslot; + ecard_t **ecp; + ecard_t *ec; struct ex_ecid cid; char buffer[200]; - int i; + int i, rc = -ENOMEM; + + ec = kmalloc(sizeof(ecard_t), GFP_KERNEL); + + if (!ec) + goto nodev; - irqno_to_expcard[card] = -1; + memset(ec, 0, sizeof(ecard_t)); - ec->slot_no = card; + ec->slot_no = slot; + ec->type = type; ec->irq = NO_IRQ; ec->fiq = NO_IRQ; ec->dma = NO_DMA; ec->card_desc = NULL; ec->ops = &ecard_default_ops; + rc = -ENODEV; if ((ec->podaddr = ecard_address(ec, type, ECARD_SYNC)) == 0) - return 0; + goto nodev; cid.r_zero = 1; ecard_readbytes(&cid, ec, 0, 16, 0); if (cid.r_zero) - return 0; + goto nodev; - irqno_to_expcard[card] = freeslot; - - ec->type = type; ec->cid.id = cid.r_id; ec->cid.cd = cid.r_cd; ec->cid.is = cid.r_is; @@ -415,9 +917,9 @@ __initfunc(static int ecard_probe(int card, int freeslot, card_type_t type)) ec->cid.fiqmask = cid.r_fiqmask; ec->cid.fiqoff = ecard_gets24(cid.r_fiqoff); ec->fiqaddr = - ec->irqaddr = (unsigned char *)BUS_ADDR(ec->podaddr); + ec->irqaddr = (unsigned char *)ioaddr(ec->podaddr); - if (ec->cid.cd && ec->cid.is) { + if (ec->cid.is) { ec->irqmask = ec->cid.irqmask; ec->irqaddr += ec->cid.irqoff; ec->fiqmask = ec->cid.fiqmask; @@ -430,88 +932,69 @@ __initfunc(static int ecard_probe(int card, int freeslot, card_type_t type)) for (i = 0; i < sizeof(blacklist) / sizeof(*blacklist); i++) if (blacklist[i].manufacturer == ec->cid.manufacturer && blacklist[i].product == ec->cid.product) { - ec->loader = blacklist[i].loader; ec->card_desc = blacklist[i].type; break; } - ecard_prints(buffer, ec); - printk("%s", buffer); - - ec->irq = 32 + card; + ec->irq = 32 + slot; #ifdef IO_EC_MEMC8_BASE - if (card == 8) + if (slot == 8) ec->irq = 11; #endif #ifdef CONFIG_ARCH_RPC /* On RiscPC, only first two slots have DMA capability */ - if (card < 2) - ec->dma = 2 + card; + if (slot < 2) + ec->dma = 2 + slot; #endif #if 0 /* We don't support FIQs on expansion cards at the moment */ - ec->fiq = 96 + card; + ec->fiq = 96 + slot; #endif - return 1; -} + rc = 0; -/* - * This is called to reset the loaders for each expansion card on reboot. - * - * This is required to make sure that the card is in the correct state - * that RiscOS expects it to be. - */ -void ecard_reset(int card) -{ - extern int ecard_loader_reset(volatile unsigned int pa, loader_t loader); + for (ecp = &cards; *ecp; ecp = &(*ecp)->next); - if (card >= ecard_numcards) - return; - - if (card < 0) { - for (card = 0; card < ecard_numcards; card++) - if (expcard[card].loader) - ecard_loader_reset(BUS_ADDR(expcard[card].podaddr), - expcard[card].loader); - } else - if (expcard[card].loader) - ecard_loader_reset(BUS_ADDR(expcard[card].podaddr), - expcard[card].loader); + *ecp = ec; -#ifdef HAS_EXPMASK - if (have_expmask) { - have_expmask |= ~0; - EXPMASK_ENABLE = have_expmask; +nodev: + if (rc && ec) + kfree(ec); + else { + slot_to_expcard[slot] = ec; + + ecard_prints(buffer, ec); + printk("%s", buffer); } -#endif + return rc; } -static unsigned int ecard_startcard; +static ecard_t *finding_pos; void ecard_startfind(void) { - ecard_startcard = 0; + finding_pos = NULL; } ecard_t *ecard_find(int cid, const card_ids *cids) { - int card; + if (!finding_pos) + finding_pos = cards; + else + finding_pos = finding_pos->next; + + for (; finding_pos; finding_pos = finding_pos->next) { + if (finding_pos->claimed) + continue; - if (!cids) { - for (card = ecard_startcard; card < ecard_numcards; card++) - if (!expcard[card].claimed && - (expcard[card].cid.id ^ cid) == 0) + if (!cids) { + if ((finding_pos->cid.id ^ cid) == 0) break; - } else { - for (card = ecard_startcard; card < ecard_numcards; card++) { + } else { unsigned int manufacturer, product; int i; - if (expcard[card].claimed) - continue; - - manufacturer = expcard[card].cid.manufacturer; - product = expcard[card].cid.product; + manufacturer = finding_pos->cid.manufacturer; + product = finding_pos->cid.product; for (i = 0; cids[i].manufacturer != 65535; i++) if (manufacturer == cids[i].manufacturer && @@ -523,111 +1006,24 @@ ecard_t *ecard_find(int cid, const card_ids *cids) } } - ecard_startcard = card + 1; - - return card < ecard_numcards ? &expcard[card] : NULL; + return finding_pos; } -int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num) +__initfunc(static void ecard_free_all(void)) { - struct ex_chunk_dir excd; - int index = 16; - int useld = 0; + ecard_t *ec, *ecn; - if (!ec->cid.is || !ec->cid.cd) - return 0; - - while(1) { - ecard_readbytes(&excd, ec, index, 8, useld); - index += 8; - if (c_id(&excd) == 0) { - if (!useld && ec->loader) { - useld = 1; - index = 0; - continue; - } - return 0; - } - if (c_id(&excd) == 0xf0) { /* link */ - index = c_start(&excd); - continue; - } - if (c_id(&excd) == 0x80) { /* loader */ - if (!ec->loader) { - ec->loader = (loader_t)kmalloc(c_len(&excd), GFP_KERNEL); - ecard_readbytes(ec->loader, ec, (int)c_start(&excd), c_len(&excd), useld); - } - continue; - } - if (c_id(&excd) == id && num-- == 0) - break; - } + for (ec = cards; ec; ec = ecn) { + ecn = ec->next; - if (c_id(&excd) & 0x80) { - switch (c_id(&excd) & 0x70) { - case 0x70: - ecard_readbytes((unsigned char *)excd.d.string, ec, - (int)c_start(&excd), c_len(&excd), useld); - break; - case 0x00: - break; - } + kfree(ec); } - cd->start_offset = c_start(&excd); - memcpy(cd->d.string, excd.d.string, 256); - return 1; -} - -unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) -{ - switch (ec->slot_no) { - case 0 ... 3: - switch (type) { - case ECARD_MEMC: - return IO_EC_MEMC_BASE + (ec->slot_no << 12); - - case ECARD_IOC: - return IO_EC_IOC_BASE + (speed << 17) + (ec->slot_no << 12); - -#ifdef IO_EC_EASI_BASE - case ECARD_EASI: - return IO_EC_EASI_BASE + (ec->slot_no << 22); -#endif - } - break; - case 4 ... 7: - switch (type) { -#ifdef IO_EC_IOC4_BASE - case ECARD_IOC: - return IO_EC_IOC4_BASE + (speed << 17) + ((ec->slot_no - 4) << 12); -#endif -#ifdef IO_EC_EASI_BASE - case ECARD_EASI: - return IO_EC_EASI_BASE + (ec->slot_no << 22); -#endif - default: - break; - } - break; + cards = NULL; -#ifdef IO_EC_MEMC8_BASE - case 8: - return IO_EC_MEMC8_BASE; -#endif - } - return 0; + memset(slot_to_expcard, 0, sizeof(slot_to_expcard)); } -static struct irqaction irqexpansioncard = { - ecard_irq_noexpmask, - SA_INTERRUPT, - 0, - "expansion cards", - NULL, - NULL -}; - /* * Initialise the expansion card system. * Locate all hardware - interrupt management and @@ -635,51 +1031,38 @@ static struct irqaction irqexpansioncard = { */ __initfunc(void ecard_init(void)) { - int i, nc = 0; + int slot; - memset(expcard, 0, sizeof(expcard)); + oldlatch_init(); -#ifdef HAS_EXPMASK - if (ecard_checkirqhw()) { - printk(KERN_DEBUG "Expansion card interrupt management hardware found\n"); - irqexpansioncard.handler = ecard_irq_expmask; - irqexpansioncard.flags |= SA_IRQNOMASK; - have_expmask = -1; - } +#ifdef CONFIG_CPU_32 + init_waitqueue_head(&ecard_wait); + init_waitqueue_head(&ecard_done); #endif - printk("Installed expansion cards:"); + printk("Probing expansion cards: (does not imply support)\n"); - /* - * First of all, probe all cards on the expansion card interrupt line - */ - for (i = 0; i < 8; i++) - if (ecard_probe(i, nc, ECARD_IOC) || ecard_probe(i, nc, ECARD_EASI)) - nc += 1; - else - have_expmask &= ~(1<<i); - - ecard_numirqcards = nc; + for (slot = 0; slot < 8; slot ++) { + if (ecard_probe(slot, ECARD_EASI) == -ENODEV) + ecard_probe(slot, ECARD_IOC); + } - /* Now probe other cards with different interrupt lines - */ #ifdef IO_EC_MEMC8_BASE - if (ecard_probe(8, nc, ECARD_IOC)) - nc += 1; + ecard_probe(8, ECARD_IOC); #endif - printk("\n"); - ecard_numcards = nc; + ecard_probeirqhw(); - if (nc && setup_arm_irq(IRQ_EXPANSIONCARD, &irqexpansioncard)) { - printk("Could not allocate interrupt for expansion cards\n"); - return; + if (setup_arm_irq(IRQ_EXPANSIONCARD, &irqexpansioncard)) { + printk(KERN_ERR "Unable to claim IRQ%d for expansion cards\n", + IRQ_EXPANSIONCARD); + ecard_free_all(); } - -#ifdef HAS_EXPMASK - if (nc && have_expmask) - EXPMASK_ENABLE = have_expmask; -#endif - oldlatch_init(); + ecard_proc_init(); } + +EXPORT_SYMBOL(ecard_startfind); +EXPORT_SYMBOL(ecard_find); +EXPORT_SYMBOL(ecard_readchunk); +EXPORT_SYMBOL(ecard_address); diff --git a/arch/arm/kernel/entry-armo.S b/arch/arm/kernel/entry-armo.S index 3ca29cd2c..758163f07 100644 --- a/arch/arm/kernel/entry-armo.S +++ b/arch/arm/kernel/entry-armo.S @@ -159,8 +159,8 @@ irq_prio_h: .byte 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 .macro restore_user_regs ldmia sp, {r0 - lr}^ mov r0, r0 - add sp, sp, #15*4 - ldr lr, [sp], #8 + ldr lr, [sp, #15*4] + add sp, sp, #15*4+8 movs pc, lr .endm @@ -226,13 +226,6 @@ irq_prio_h: .byte 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 str r0, [sp, #S_OLD_R0] ;\ mov fp, #0 -#define USER_RESTORE_ALL \ - ldmia sp, {r0 - lr}^ ;\ - mov r0, r0 ;\ - add sp, sp, #15*4 ;\ - ldr lr, [sp], #8 ;\ - movs pc, lr - #define SVC_RESTORE_ALL \ ldmfd sp, {r0 - pc}^ @@ -253,7 +246,7 @@ _unexp_fiq: ldr sp, .LCfiq mov r0, r0 movs pc, lr -Lfiqmsg: .ascii "*** Unexpeced FIQ\n\0" +Lfiqmsg: .ascii "*** Unexpected FIQ\n\0" .align .LCfiq: .word __temp_fiq @@ -315,14 +308,14 @@ fpe_not_present: and r4, r10, #255 @ get offset and r6, r10, #0x000f0000 tst r10, #0x00800000 @ +/- - rsbeq r4, r4, #0 ldr r5, [sp, r6, lsr #14] @ Load reg + rsbeq r4, r4, #0 add r5, r5, r4, lsl #2 str r5, [sp, r6, lsr #14] @ Save reg b ret_from_exception -wfs_mask_data: .word 0x0e200110 @ WFS - .word 0x0fff0fff +wfs_mask_data: .word 0x0e200110 @ WFS/RFS + .word 0x0fef0fff .word 0x0d0d0100 @ LDF [sp]/STF [sp] .word 0x0d0b0100 @ LDF [fp]/STF [fp] .word 0x0f0f0f00 @@ -341,8 +334,7 @@ vector_prefetch: save_user_regs teqp pc, #0x00000003 @ NOT a problem - doesnt change mode mask_pc r0, lr @ Address of abort - mov r1, #FAULT_CODE_PREFETCH|FAULT_CODE_USER @ Error code - mov r2, sp @ Tasks registers + mov r1, sp @ Tasks registers bl SYMBOL_NAME(do_PrefetchAbort) teq r0, #0 @ If non-zero, we believe this abort.. bne ret_from_sys_call @@ -451,6 +443,7 @@ vector_IRQ: ldr r13, .LCirq @ I will leave this one in just in case... adr lr, 1b orr lr, lr, #0x08000003 @ Force SVC bne do_IRQ + mov r4, #0 b ret_with_reschedule irq_prio_table @@ -562,8 +555,8 @@ Ldata_ldrstr_numindex: and r0, r0, #15 << 2 @ Mask out reg. teq r0, #15 << 2 ldr r0, [r3, r0] @ Get register - biceq r0, r0, #PCMASK mov r1, r4, lsl #20 + biceq r0, r0, #PCMASK tst r4, #1 << 23 addne r0, r0, r1, lsr #20 subeq r0, r0, r1, lsr #20 @@ -578,12 +571,12 @@ Ldata_ldrstr_regindex: and r0, r0, #15 << 2 @ Mask out reg. teq r0, #15 << 2 ldr r0, [r3, r0] @ Get register - biceq r0, r0, #PCMASK and r7, r4, #15 + biceq r0, r0, #PCMASK teq r7, #15 @ Check for PC ldr r7, [r3, r7, lsl #2] @ Get Rm - biceq r7, r7, #PCMASK and r8, r4, #0x60 @ Get shift types + biceq r7, r7, #PCMASK mov r9, r4, lsr #7 @ Get shift amount and r9, r9, #31 teq r8, #0 @@ -645,8 +638,8 @@ Ldata_ldcstc_pre: and r0, r0, #15 << 2 @ Mask out reg. teq r0, #15 << 2 ldr r0, [r3, r0] @ Get register - biceq r0, r0, #PCMASK mov r1, r4, lsl #24 @ Get offset + biceq r0, r0, #PCMASK tst r4, #1 << 23 addne r0, r0, r1, lsr #24 subeq r0, r0, r1, lsr #24 @@ -656,9 +649,54 @@ Ldata_ldcstc_pre: #endif b SYMBOL_NAME(do_DataAbort) -#include "entry-common.S" +/* + *============================================================================= + * Low-level interface code + *----------------------------------------------------------------------------- + * Trap initialisation + *----------------------------------------------------------------------------- + * + * Note - FIQ code has changed. The default is a couple of words in 0x1c, 0x20 + * that call _unexp_fiq. Nowever, we now copy the FIQ routine to 0x1c (removes + * some excess cycles). + * + * What we need to put into 0-0x1c are branches to branch to the kernel. + */ - .data + .section ".text.init",#alloc,#execinstr + +.Ljump_addresses: + swi SYS_ERROR0 + .word vector_undefinstr - 12 + .word vector_swi - 16 + .word vector_prefetch - 20 + .word vector_data - 24 + .word vector_addrexcptn - 28 + .word vector_IRQ - 32 + .word _unexp_fiq - 36 + b . + 8 +/* + * initialise the trap system + */ +ENTRY(trap_init) + stmfd sp!, {r4 - r7, lr} + adr r1, .Ljump_addresses + ldmia r1, {r1 - r7, ip, lr} + orr r2, lr, r2, lsr #2 + orr r3, lr, r3, lsr #2 + orr r4, lr, r4, lsr #2 + orr r5, lr, r5, lsr #2 + orr r6, lr, r6, lsr #2 + orr r7, lr, r7, lsr #2 + orr ip, lr, ip, lsr #2 + mov r0, #0 + stmia r0, {r1 - r7, ip} + ldmfd sp!, {r4 - r7, pc}^ + + .text + +#include "entry-common.S" -__temp_irq: .word 0 @ saved lr_irq + .bss +__temp_irq: .space 4 @ saved lr_irq __temp_fiq: .space 128 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bcc938b32..9456abe33 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -61,8 +61,12 @@ #define S_R1 4 #define S_R0 0 +#define OFF_CR_ALIGNMENT(x) cr_alignment - x + #ifdef IOC_BASE /* IOC / IOMD based hardware */ +#include <asm/iomd.h> + .equ ioc_base_high, IOC_BASE & 0xff000000 .equ ioc_base_low, IOC_BASE & 0x00ff0000 .macro disable_fiq @@ -186,113 +190,109 @@ irq_prio_ebsa110: .byte 6, 6, 6, 6, 2, 2, 2, 2, 3, 3, 6, 6, 2, 2, 2, 2 .endm -#elif defined(CONFIG_ARCH_EBSA285) +#elif defined(CONFIG_HOST_FOOTBRIDGE) || defined(CONFIG_ADDIN_FOOTBRIDGE) +#include <asm/dec21285.h> .macro disable_fiq .endm + .equ irq_mask_pci_err_high, IRQ_MASK_PCI_ERR & 0xff000000 + .equ irq_mask_pci_err_low, IRQ_MASK_PCI_ERR & 0x00ffffff + .equ dc21285_high, ARMCSR_BASE & 0xff000000 + .equ dc21285_low, ARMCSR_BASE & 0x00ffffff + .macro get_irqnr_and_base, irqnr, irqstat, base - mov r4, #0xfe000000 + mov r4, #dc21285_high + .if dc21285_low + orr r4, r4, #dc21285_low + .endif ldr \irqstat, [r4, #0x180] @ get interrupts - mov \irqnr, #0 -1001: tst \irqstat, #1 - addeq \irqnr, \irqnr, #1 - moveq \irqstat, \irqstat, lsr #1 - tsteq \irqnr, #32 - beq 1001b - teq \irqnr, #32 - .endm - .macro irq_prio_table - .endm - -#elif defined(CONFIG_ARCH_NEXUSPCI) + mov \irqnr, #IRQ_SDRAMPARITY + tst \irqstat, #IRQ_MASK_SDRAMPARITY + bne 1001f - .macro disable_fiq - .endm + tst \irqstat, #IRQ_MASK_UART_RX + movne \irqnr, #IRQ_CONRX + bne 1001f - .macro get_irqnr_and_base, irqnr, irqstat, base - ldr r4, =0xffe00000 - ldr \irqstat, [r4, #0x180] @ get interrupts - mov \irqnr, #0 -1001: tst \irqstat, #1 - addeq \irqnr, \irqnr, #1 - moveq \irqstat, \irqstat, lsr #1 - tsteq \irqnr, #32 - beq 1001b - teq \irqnr, #32 - .endm + tst \irqstat, #IRQ_MASK_DMA1 + movne \irqnr, #IRQ_DMA1 + bne 1001f - .macro irq_prio_table - .endm + tst \irqstat, #IRQ_MASK_DMA2 + movne \irqnr, #IRQ_DMA2 + bne 1001f -#elif defined(CONFIG_ARCH_VNC) + tst \irqstat, #IRQ_MASK_IN0 + movne \irqnr, #IRQ_IN0 + bne 1001f - .macro disable_fiq - .endm + tst \irqstat, #IRQ_MASK_IN1 + movne \irqnr, #IRQ_IN1 + bne 1001f - .equ pci_iack_high, PCI_IACK & 0xff000000 - .equ pci_iack_low, PCI_IACK & 0x00ff0000 + tst \irqstat, #IRQ_MASK_IN2 + movne \irqnr, #IRQ_IN2 + bne 1001f - .macro get_irqnr_and_base, irqnr, irqstat, base - mov r4, #IO_BASE_ARM_CSR - ldr \irqstat, [r4, #CSR_IRQ_STATUS] @ just show us the unmasked ones + tst \irqstat, #IRQ_MASK_IN3 + movne \irqnr, #IRQ_IN3 + bne 1001f - @ run through hard priorities - @ timer - tst \irqstat, #IRQ_MASK_TIMER0 - movne \irqnr, #IRQ_TIMER0 + tst \irqstat, #IRQ_MASK_PCI + movne \irqnr, #IRQ_PCI bne 1001f - @ ether10 - tst \irqstat, #IRQ_MASK_ETHER10 - movne \irqnr, #IRQ_ETHER10 + tst \irqstat, #IRQ_MASK_I2OINPOST + movne \irqnr, #IRQ_I2OINPOST bne 1001f - @ ether100 - tst \irqstat, #IRQ_MASK_ETHER100 - movne \irqnr, #IRQ_ETHER100 + tst \irqstat, #IRQ_MASK_TIMER1 + movne \irqnr, #IRQ_TIMER1 bne 1001f - @ video compressor - tst \irqstat, #IRQ_MASK_VIDCOMP - movne \irqnr, #IRQ_VIDCOMP + tst \irqstat, #IRQ_MASK_TIMER2 + movne \irqnr, #IRQ_TIMER2 bne 1001f - @ now try all the PIC sources - @ determine whether we have an irq - tst \irqstat, #IRQ_MASK_EXTERN_IRQ - beq 1002f - mov r4, #pci_iack_high - orr r4, r4, #pci_iack_low - ldrb \irqnr, [r4] @ get the IACK byte - b 1001f - -1002: @ PCI errors - tst \irqstat, #IRQ_MASK_PCI_ERR - movne \irqnr, #IRQ_PCI_ERR + tst \irqstat, #IRQ_MASK_TIMER3 + movne \irqnr, #IRQ_TIMER3 bne 1001f - @ softint - tst \irqstat, #IRQ_MASK_SOFTIRQ - movne \irqnr, #IRQ_SOFTIRQ + tst \irqstat, #IRQ_MASK_UART_TX + movne \irqnr, #IRQ_CONTX bne 1001f - @ debug uart - tst \irqstat, #IRQ_MASK_UART_DEBUG - movne \irqnr, #IRQ_CONRX + tst \irqstat, #irq_mask_pci_err_high + tsteq \irqstat, #irq_mask_pci_err_low + movne \irqnr, #IRQ_PCI_ERR bne 1001f +1001: + .endm - @ watchdog - tst \irqstat, #IRQ_MASK_WATCHDOG - movne \irqnr, #IRQ_WATCHDOG + .macro irq_prio_table + .endm -1001: @ If Z is set, then we will not enter an interrupt +#elif defined(CONFIG_ARCH_NEXUSPCI) + + .macro disable_fiq .endm - .macro irq_prio_table + .macro get_irqnr_and_base, irqnr, irqstat, base + ldr r4, =0xffe00000 + ldr \irqstat, [r4, #0x180] @ get interrupts + mov \irqnr, #0 +1001: tst \irqstat, #1 + addeq \irqnr, \irqnr, #1 + moveq \irqstat, \irqstat, lsr #1 + tsteq \irqnr, #32 + beq 1001b + teq \irqnr, #32 .endm + .macro irq_prio_table + .endm #else #error Unknown architecture #endif @@ -306,22 +306,22 @@ irq_prio_ebsa110: stmia sp, {r0 - r12} @ Calling r0 - r12 add r8, sp, #S_PC stmdb r8, {sp, lr}^ @ Calling sp, lr - mov r7, r0 + str lr, [r8, #0] @ Save calling PC mrs r6, spsr - mov r5, lr - stmia r8, {r5, r6, r7} @ Save calling PC, CPSR, OLD_R0 + str r6, [r8, #4] @ Save CPSR + str r0, [r8, #8] @ Save OLD_R0 .endm .macro restore_user_regs - mrs r0, cpsr @ disable IRQs - orr r0, r0, #I_BIT - msr cpsr, r0 + mrs r1, cpsr @ disable IRQs + orr r1, r1, #I_BIT ldr r0, [sp, #S_PSR] @ Get calling cpsr + msr cpsr, r1 msr spsr, r0 @ save in spsr_svc ldmia sp, {r0 - lr}^ @ Get calling r0 - lr mov r0, r0 - add sp, sp, #S_PC - ldr lr, [sp], #S_FRAME_SIZE - S_PC @ Get PC and jump over PC, PSR, OLD_R0 + ldr lr, [sp, #S_PC] @ Get PC + add sp, sp, #S_FRAME_SIZE movs pc, lr @ return & move spsr_svc into cpsr .endm @@ -348,25 +348,6 @@ irq_prio_ebsa110: msr cpsr, \temp .endm - .macro initialise_traps_extra - mrs r0, cpsr - bic r0, r0, #31 - orr r0, r0, #0xd3 - msr cpsr, r0 - .endm - - -#ifndef __ARM_ARCH_4__ -.Larm700bug: str lr, [r8] - ldr r0, [sp, #S_PSR] @ Get calling cpsr - msr spsr, r0 - ldmia sp, {r0 - lr}^ @ Get calling r0 - lr - mov r0, r0 - add sp, sp, #S_PC - ldr lr, [sp], #S_FRAME_SIZE - S_PC @ Get PC and jump over PC, PSR, OLD_R0 - movs pc, lr -#endif - .macro get_current_task, rd mov \rd, sp, lsr #13 mov \rd, \rd, lsl #13 @@ -379,231 +360,89 @@ irq_prio_ebsa110: adr\cond \reg, \label .endm -/*============================================================================= - * Address exception handler - *----------------------------------------------------------------------------- - * These aren't too critical. - * (they're not supposed to happen, and won't happen in 32-bit mode). - */ - -vector_addrexcptn: - b vector_addrexcptn - -/*============================================================================= - * Undefined FIQs - *----------------------------------------------------------------------------- - * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC - * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg. - * Basically to switch modes, we *HAVE* to clobber one register... brain - * damage alert! I don't think that we can execute any code in here in any - * other mode than FIQ... Ok you can switch to another mode, but you can't - * get out of that mode without clobbering one register. - */ -_unexp_fiq: disable_fiq - subs pc, lr, #4 - -/*============================================================================= - * Interrupt entry dispatcher - *----------------------------------------------------------------------------- - * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC - */ -vector_IRQ: @ - @ save mode specific registers - @ - ldr r13, .LCirq - sub lr, lr, #4 - str lr, [r13] @ save lr_IRQ - mrs lr, spsr - str lr, [r13, #4] @ save spsr_IRQ - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr @ switch to SVC mode - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - cmp lr, #4 - addlts pc, pc, lr, lsl #2 @ Changes mode and branches - b __irq_invalid @ 4 - 15 - b __irq_usr @ 0 (USR_26 / USR_32) - b __irq_invalid @ 1 (FIQ_26 / FIQ_32) - b __irq_invalid @ 2 (IRQ_26 / IRQ_32) - b __irq_svc @ 3 (SVC_26 / SVC_32) -/* - *------------------------------------------------------------------------------------------------ - * Undef instr entry dispatcher - dispatches it to the correct handler for the processor mode - *------------------------------------------------------------------------------------------------ - * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC - */ -.LCirq: .word __temp_irq -.LCund: .word __temp_und -.LCabt: .word __temp_abt - -vector_undefinstr: - @ - @ save mode specific registers - @ - ldr r13, [pc, #.LCund - . - 8] - str lr, [r13] - mrs lr, spsr - str lr, [r13, #4] - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - cmp lr, #4 - addlts pc, pc, lr, lsl #2 @ Changes mode and branches - b __und_invalid @ 4 - 15 - b __und_usr @ 0 (USR_26 / USR_32) - b __und_invalid @ 1 (FIQ_26 / FIQ_32) - b __und_invalid @ 2 (IRQ_26 / IRQ_32) - b __und_svc @ 3 (SVC_26 / SVC_32) -/* - *------------------------------------------------------------------------------------------------ - * Prefetch abort dispatcher - dispatches it to the correct handler for the processor mode - *------------------------------------------------------------------------------------------------ - * Enter in ABT mode, spsr = USR CPSR, lr = USR PC - */ -vector_prefetch: - @ - @ save mode specific registers - @ - sub lr, lr, #4 - ldr r13, .LCabt - str lr, [r13] - mrs lr, spsr - str lr, [r13, #4] - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - adds pc, pc, lr, lsl #2 @ Changes mode and branches - b __pabt_invalid @ 4 - 15 - b __pabt_usr @ 0 (USR_26 / USR_32) - b __pabt_invalid @ 1 (FIQ_26 / FIQ_32) - b __pabt_invalid @ 2 (IRQ_26 / IRQ_32) - b __pabt_invalid @ 3 (SVC_26 / SVC_32) /* - *------------------------------------------------------------------------------------------------ - * Data abort dispatcher - dispatches it to the correct handler for the processor mode - *------------------------------------------------------------------------------------------------ - * Enter in ABT mode, spsr = USR CPSR, lr = USR PC + * Invalid mode handlers */ -vector_data: @ - @ save mode specific registers - @ - sub lr, lr, #8 - ldr r13, .LCabt - str lr, [r13] - mrs lr, spsr - str lr, [r13, #4] - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - cmp lr, #4 - addlts pc, pc, lr, lsl #2 @ Changes mode & branches - b __dabt_invalid @ 4 - 15 - b __dabt_usr @ 0 (USR_26 / USR_32) - b __dabt_invalid @ 1 (FIQ_26 / FIQ_32) - b __dabt_invalid @ 2 (IRQ_26 / IRQ_32) - b __dabt_svc @ 3 (SVC_26 / SVC_32) +__pabt_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go + stmia sp, {r0 - lr} @ Save XXX r0 - lr + ldr r4, .LCabt + mov r1, #BAD_PREFETCH + b 1f -/*============================================================================= - * Prefetch abort handler - *----------------------------------------------------------------------------- - */ -pabtmsg: .ascii "Pabt: %08lX\n\0" - .align -__pabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go - stmia sp, {r0 - r12} @ Save r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ @ Save sp_usr lr_usr +__dabt_invalid: sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - lr} @ Save SVC r0 - lr [lr *should* be intact] ldr r4, .LCabt - ldmia r4, {r5 - r7} @ Get USR pc, cpsr - stmia r8, {r5 - r7} @ Save USR pc, cpsr, old_r0 + mov r1, #BAD_DATA + b 1f - mrs r7, cpsr @ Enable interrupts if they were - bic r7, r7, #I_BIT @ previously - msr cpsr, r7 - mov r0, r5 @ address (pc) - mov r1, sp @ regs - bl SYMBOL_NAME(do_PrefetchAbort) @ call abort handler - teq r0, #0 @ Does this still apply??? - bne ret_from_exception @ Return from exception -#ifdef DEBUG_UNDEF - adr r0, t - bl SYMBOL_NAME(printk) -#endif - mov r0, r5 - mov r1, sp - and r2, r6, #31 - bl SYMBOL_NAME(do_undefinstr) - ldr lr, [sp, #S_PSR] @ Get USR cpsr - msr spsr, lr - ldmia sp, {r0 - pc}^ @ Restore USR registers +__irq_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate space on stack for frame + stmfd sp, {r0 - lr} @ Save r0 - lr + ldr r4, .LCirq + mov r1, #BAD_IRQ + b 1f -__pabt_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go - stmia sp, {r0 - lr} @ Save XXX r0 - lr - mov r7, r0 @ OLD R0 - ldr r4, .LCabt - ldmia r4, {r5 - r7} @ Get XXX pc, cpsr +__und_invalid: sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - lr} + ldr r4, .LCund + mov r1, #BAD_UNDEFINSTR @ int reason + +1: mov fp, #0 + ldmia r4, {r5 - r7} @ Get XXX pc, cpsr, old_r0 add r4, sp, #S_PC stmia r4, {r5 - r7} @ Save XXX pc, cpsr, old_r0 - mov r0, sp @ Prefetch aborts are definitely *not* - mov r1, #BAD_PREFETCH @ allowed in non-user modes. We cant - and r2, r6, #31 @ recover from this problem. + mov r0, sp + and r2, r6, #31 @ int mode b SYMBOL_NAME(bad_mode) -#ifdef DEBUG_UNDEF -t: .ascii "*** undef ***\r\n\0" - .align -#endif -/*============================================================================= - * Data abort handler code - *----------------------------------------------------------------------------- - */ -.LCprocfns: .word SYMBOL_NAME(processor) +wfs_mask_data: .word 0x0e200110 @ WFS/RFS + .word 0x0fef0fff + .word 0x0d0d0100 @ LDF [sp]/STF [sp] + .word 0x0d0b0100 @ LDF [fp]/STF [fp] + .word 0x0f0f0f00 -__dabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go - stmia sp, {r0 - r12} @ save r0 - r12 - add r3, sp, #S_PC - stmdb r3, {sp, lr}^ - ldr r0, .LCabt - ldmia r0, {r0 - r2} @ Get USR pc, cpsr - stmia r3, {r0 - r2} @ Save USR pc, cpsr, old_r0 - mov fp, #0 - mrs r2, cpsr @ Enable interrupts if they were - bic r2, r2, #I_BIT @ previously - msr cpsr, r2 - ldr r2, .LCprocfns - mov lr, pc - ldr pc, [r2, #8] @ call processor specific code - mov r3, sp - bl SYMBOL_NAME(do_DataAbort) - b ret_from_sys_call +/* We get here if an undefined instruction happens and the floating + * point emulator is not present. If the offending instruction was + * a WFS, we just perform a normal return as if we had emulated the + * operation. This is a hack to allow some basic userland binaries + * to run so that the emulator module proper can be loaded. --philb + */ +fpe_not_present: + adr r10, wfs_mask_data + ldmia r10, {r4, r5, r6, r7, r8} + ldr r10, [sp, #S_PC] @ Load PC + sub r10, r10, #-4 + mask_pc r10, r10 + ldrt r10, [r10] @ get instruction + and r5, r10, r5 + teq r5, r4 @ Is it WFS? + moveq pc, r9 + and r5, r10, r8 + teq r5, r6 @ Is it LDF/STF on sp or fp? + teqne r5, r7 + movne pc, lr + tst r10, #0x00200000 @ Does it have WB + moveq pc, r9 + and r4, r10, #255 @ get offset + and r6, r10, #0x000f0000 + tst r10, #0x00800000 @ +/- + ldr r5, [sp, r6, lsr #14] @ Load reg + rsbeq r4, r4, #0 + add r5, r5, r4, lsl #2 + str r5, [sp, r6, lsr #14] @ Save reg + mov pc, r9 +/* + * SVC mode handlers + */ + .align 5 __dabt_svc: sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ save r0 - r12 ldr r2, .LCabt add r0, sp, #S_FRAME_SIZE + ldmia r2, {r2 - r4} @ get pc, cpsr add r5, sp, #S_SP mov r1, lr - ldmia r2, {r2 - r4} @ get pc, cpsr stmia r5, {r0 - r4} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro tst r3, #I_BIT mrseq r0, cpsr @ Enable interrupts if they were @@ -619,29 +458,15 @@ __dabt_svc: sub sp, sp, #S_FRAME_SIZE msr spsr, r0 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -__dabt_invalid: sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - lr} @ Save SVC r0 - lr [lr *should* be intact] - mov r7, r0 - ldr r4, .LCabt - ldmia r4, {r5, r6} @ Get SVC pc, cpsr - add r4, sp, #S_PC - stmia r4, {r5, r6, r7} @ Save SVC pc, cpsr, old_r0 - mov r0, sp - mov r1, #BAD_DATA - and r2, r6, #31 - b SYMBOL_NAME(bad_mode) - -/*============================================================================= - * Interrupt (IRQ) handler - *----------------------------------------------------------------------------- - */ -__irq_usr: sub sp, sp, #S_FRAME_SIZE + .align 5 +__irq_svc: sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ save r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ - ldr r4, .LCirq - ldmia r4, {r5 - r7} @ get saved PC, SPSR - stmia r8, {r5 - r7} @ save pc, psr, old_r0 + ldr r7, .LCirq + add r5, sp, #S_FRAME_SIZE + ldmia r7, {r7 - r9} + add r4, sp, #S_SP + mov r6, lr + stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro 1: get_irqnr_and_base r0, r6, r5 movne r1, sp @ @@ -649,148 +474,414 @@ __irq_usr: sub sp, sp, #S_FRAME_SIZE @ adrsvc ne, lr, 1b bne do_IRQ - b ret_with_reschedule - - irq_prio_table + ldr r0, [sp, #S_PSR] + msr spsr, r0 + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -__irq_svc: sub sp, sp, #S_FRAME_SIZE + .align 5 +__und_svc: sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ save r0 - r12 + ldr r7, .LCund mov r6, lr - ldr r7, .LCirq ldmia r7, {r7 - r9} add r5, sp, #S_FRAME_SIZE add r4, sp, #S_SP - stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro + stmia r4, {r5 - r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro + + adrsvc al, r9, 1f @ r9 = normal FP return + bl call_fpe @ lr = undefined instr return + + mov r0, r5 @ unsigned long pc + mov r1, sp @ struct pt_regs *regs + bl SYMBOL_NAME(do_undefinstr) + +1: ldr lr, [sp, #S_PSR] @ Get SVC cpsr + msr spsr, lr + ldmia sp, {r0 - pc}^ @ Restore SVC registers + + .align 5 +.LCirq: .word __temp_irq +.LCund: .word __temp_und +.LCabt: .word __temp_abt +.LCprocfns: .word SYMBOL_NAME(processor) +.LCfp: .word SYMBOL_NAME(fp_enter) +#ifdef CONFIG_ALIGNMENT_TRAP +.LCswi: .word SYMBOL_NAME(cr_alignment) +#endif + + irq_prio_table + +/* + * User mode handlers + */ +#ifdef DEBUG_UNDEF +t: .ascii "Prefetch -> undefined instruction\n\0" + .align +#endif + .align 5 +__dabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go + stmia sp, {r0 - r12} @ save r0 - r12 + ldr r4, .LCabt + add r3, sp, #S_PC + ldmia r4, {r0 - r2} @ Get USR pc, cpsr + stmia r3, {r0 - r2} @ Save USR pc, cpsr, old_r0 + stmdb r3, {sp, lr}^ + +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_abt)] + mcr p15, 0, r7, c1, c0 +#endif + + mov fp, #0 + mrs r2, cpsr @ Enable interrupts if they were + bic r2, r2, #I_BIT @ previously + msr cpsr, r2 + ldr r2, .LCprocfns + mov lr, pc + ldr pc, [r2, #8] @ call processor specific code + mov r3, sp + adrsvc al, lr, ret_from_sys_call + b SYMBOL_NAME(do_DataAbort) + + .align 5 +__irq_usr: sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - r12} @ save r0 - r12 + ldr r4, .LCirq + add r8, sp, #S_PC + ldmia r4, {r5 - r7} @ get saved PC, SPSR + stmia r8, {r5 - r7} @ save pc, psr, old_r0 + stmdb r8, {sp, lr}^ + +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_irq)] + mcr p15, 0, r7, c1, c0 +#endif + + mov fp, #0 1: get_irqnr_and_base r0, r6, r5 movne r1, sp + adrsvc ne, lr, 1b @ @ routine called with r0 = irq number, r1 = struct pt_regs * @ - adrsvc ne, lr, 1b bne do_IRQ - ldr r0, [sp, #S_PSR] - msr spsr, r0 - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr - -__irq_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate space on stack for frame - stmfd sp, {r0 - lr} @ Save r0 - lr - mov r7, #-1 - ldr r4, .LCirq - ldmia r4, {r5, r6} @ get saved pc, psr - add r4, sp, #S_PC - stmia r4, {r5, r6, r7} - mov fp, #0 - mov r0, sp - mov r1, #BAD_IRQ - b SYMBOL_NAME(bad_mode) - -/*============================================================================= - * Undefined instruction handler - *----------------------------------------------------------------------------- - * Handles floating point instructions - */ -.LC2: .word SYMBOL_NAME(fp_enter) + mov r4, #0 + b ret_with_reschedule + .align 5 __und_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go stmia sp, {r0 - r12} @ Save r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ @ Save user r0 - r12 ldr r4, .LCund + add r8, sp, #S_PC ldmia r4, {r5 - r7} stmia r8, {r5 - r7} @ Save USR pc, cpsr, old_r0 - mov fp, #0 + stmdb r8, {sp, lr}^ @ Save user r0 - r12 - adrsvc al, r9, ret_from_exception @ r9 = normal FP return +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_und)] + mcr p15, 0, r7, c1, c0 +#endif + + mov fp, #0 + adrsvc al, r9, ret_from_sys_call @ r9 = normal FP return adrsvc al, lr, fpundefinstr @ lr = undefined instr return -1: get_current_task r10 +call_fpe: get_current_task r10 mov r8, #1 strb r8, [r10, #TSK_USED_MATH] @ set current->used_math + ldr r4, .LCfp add r10, r10, #TSS_FPESAVE @ r10 = workspace - ldr r4, .LC2 ldr pc, [r4] @ Call FP module USR entry point -__und_svc: sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - r12} @ save r0 - r12 - mov r6, lr - ldr r7, .LCund - ldmia r7, {r7 - r9} - add r5, sp, #S_FRAME_SIZE - add r4, sp, #S_SP - stmia r4, {r5 - r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro - - adrsvc al, r9, 3f @ r9 = normal FP return - bl 1b @ lr = undefined instr return - - mov r0, r5 @ unsigned long pc - mov r1, sp @ struct pt_regs *regs - bl SYMBOL_NAME(do_undefinstr) - -3: ldr lr, [sp, #S_PSR] @ Get SVC cpsr - msr spsr, lr - ldmia sp, {r0 - pc}^ @ Restore SVC registers - fpundefinstr: mov r0, lr mov r1, sp mrs r4, cpsr @ Enable interrupts bic r4, r4, #I_BIT msr cpsr, r4 - adrsvc al, lr, ret_from_exception + adrsvc al, lr, ret_from_sys_call b SYMBOL_NAME(do_undefinstr) -__und_invalid: sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - lr} - mov r7, r0 - ldr r4, .LCund - ldmia r4, {r5, r6} @ Get UND/IRQ/FIQ/ABT pc, cpsr - add r4, sp, #S_PC - stmia r4, {r5, r6, r7} @ Save UND/IRQ/FIQ/ABT pc, cpsr, old_r0 - mov r0, sp @ struct pt_regs *regs - mov r1, #BAD_UNDEFINSTR @ int reason - and r2, r6, #31 @ int mode - b SYMBOL_NAME(bad_mode) @ Does not ever return... + .align 5 +__pabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go + stmia sp, {r0 - r12} @ Save r0 - r12 + ldr r4, .LCabt + add r8, sp, #S_PC + ldmia r4, {r5 - r7} @ Get USR pc, cpsr + stmia r8, {r5 - r7} @ Save USR pc, cpsr, old_r0 + stmdb r8, {sp, lr}^ @ Save sp_usr lr_usr -/* We get here if an undefined instruction happens and the floating - * point emulator is not present. If the offending instruction was - * a WFS, we just perform a normal return as if we had emulated the - * operation. This is a hack to allow some basic userland binaries - * to run so that the emulator module proper can be loaded. --philb - */ -fpe_not_present: - adr r10, wfs_mask_data - ldmia r10, {r4, r5, r6, r7, r8} - ldr r10, [sp, #S_PC] @ Load PC - sub r10, r10, #4 - mask_pc r10, r10 - ldrt r10, [r10] @ get instruction - and r5, r10, r5 - teq r5, r4 @ Is it WFS? - moveq pc, r9 - and r5, r10, r8 - teq r5, r6 @ Is it LDF/STF on sp or fp? - teqne r5, r7 - movne pc, lr - tst r10, #0x00200000 @ Does it have WB - moveq pc, r9 - and r4, r10, #255 @ get offset - and r6, r10, #0x000f0000 - tst r10, #0x00800000 @ +/- - rsbeq r4, r4, #0 - ldr r5, [sp, r6, lsr #14] @ Load reg - add r5, r5, r4, lsl #2 - str r5, [sp, r6, lsr #14] @ Save reg - mov pc, r9 +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_abt)] + mcr p15, 0, r7, c1, c0 +#endif -wfs_mask_data: .word 0x0e200110 @ WFS - .word 0x0fff0fff - .word 0x0d0d0100 @ LDF [sp]/STF [sp] - .word 0x0d0b0100 @ LDF [fp]/STF [fp] - .word 0x0f0f0f00 + mov fp, #0 + mrs r7, cpsr @ Enable interrupts if they were + bic r7, r7, #I_BIT @ previously + msr cpsr, r7 + mov r0, r5 @ address (pc) + mov r1, sp @ regs + bl SYMBOL_NAME(do_PrefetchAbort) @ call abort handler + teq r0, #0 @ Does this still apply??? + bne ret_from_sys_call @ Return from exception +#ifdef DEBUG_UNDEF + adr r0, t + bl SYMBOL_NAME(printk) +#endif + mov r0, r5 + mov r1, sp + and r2, r6, #31 + bl SYMBOL_NAME(do_undefinstr) + ldr lr, [sp, #S_PSR] @ Get USR cpsr + msr spsr, lr + ldmia sp, {r0 - pc}^ @ Restore USR registers #include "entry-common.S" + .text + +#ifndef __ARM_ARCH_4__ +.Larm700bug: ldr r0, [sp, #S_PSR] @ Get calling cpsr + str lr, [r8] + msr spsr, r0 + ldmia sp, {r0 - lr}^ @ Get calling r0 - lr + mov r0, r0 + ldr lr, [sp, #S_PC] @ Get PC + add sp, sp, #S_FRAME_SIZE + movs pc, lr +#endif + + .section ".text.init",#alloc,#execinstr +/* + * Vector stubs. NOTE that we only align 'vector_IRQ' to a cache line boundary, + * and we rely on each stub being exactly 48 (1.5 cache lines) in size. This + * means that we only ever load two cache lines for this code, or one if we're + * lucky. We also copy this code to 0x200 so that we can use branches in the + * vectors, rather than ldr's. + */ + .align 5 +__stubs_start: +/* + * Interrupt dispatcher + * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC + */ +vector_IRQ: @ + @ save mode specific registers + @ + ldr r13, .LCsirq + sub lr, lr, #4 + str lr, [r13] @ save lr_IRQ + mrs lr, spsr + str lr, [r13, #4] @ save spsr_IRQ + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + and lr, lr, #15 + adr r13, .LCtab_irq + ldr lr, [r13, lr, lsl #2] + movs pc, lr @ Changes mode and branches +/* + * Data abort dispatcher - dispatches it to the correct handler for the processor mode + * Enter in ABT mode, spsr = USR CPSR, lr = USR PC + */ +vector_data: @ + @ save mode specific registers + @ + ldr r13, .LCsabt + sub lr, lr, #8 + str lr, [r13] + mrs lr, spsr + str lr, [r13, #4] + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + and lr, lr, #15 + adr r13, .LCtab_dabt + ldr lr, [r13, lr, lsl #2] + movs pc, lr @ Changes mode and branches + +/* + * Prefetch abort dispatcher - dispatches it to the correct handler for the processor mode + * Enter in ABT mode, spsr = USR CPSR, lr = USR PC + */ +vector_prefetch: + @ + @ save mode specific registers + @ + ldr r13, .LCsabt + sub lr, lr, #4 + str lr, [r13] @ save lr_ABT + mrs lr, spsr + str lr, [r13, #4] @ save spsr_ABT + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + ands lr, lr, #15 + ldreq lr, .LCtab_pabt + ldrne lr, .LCtab_pabt + 4 + movs pc, lr + +/* + * Undef instr entry dispatcher - dispatches it to the correct handler for the processor mode + * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC + */ +vector_undefinstr: + @ + @ save mode specific registers + @ + ldr r13, .LCsund + str lr, [r13] @ save lr_UND + mrs lr, spsr + str lr, [r13, #4] @ save spsr_UND + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + and lr, lr, #15 + adr r13, .LCtab_und + ldr lr, [r13, lr, lsl #2] + movs pc, lr @ Changes mode and branches + +/*============================================================================= + * Undefined FIQs + *----------------------------------------------------------------------------- + * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC + * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg. + * Basically to switch modes, we *HAVE* to clobber one register... brain + * damage alert! I don't think that we can execute any code in here in any + * other mode than FIQ... Ok you can switch to another mode, but you can't + * get out of that mode without clobbering one register. + */ +vector_FIQ: disable_fiq + subs pc, lr, #4 + +/*============================================================================= + * Address exception handler + *----------------------------------------------------------------------------- + * These aren't too critical. + * (they're not supposed to happen, and won't happen in 32-bit data mode). + */ + +vector_addrexcptn: + b vector_addrexcptn + +/* + * We group all the following data together to optimise + * for CPUs with separate I & D caches. + */ + .align 5 + +.LCtab_irq: .word __irq_usr @ 0 (USR_26 / USR_32) + .word __irq_invalid @ 1 (FIQ_26 / FIQ_32) + .word __irq_invalid @ 2 (IRQ_26 / IRQ_32) + .word __irq_svc @ 3 (SVC_26 / SVC_32) + .word __irq_invalid @ 4 + .word __irq_invalid @ 5 + .word __irq_invalid @ 6 + .word __irq_invalid @ 7 + .word __irq_invalid @ 8 + .word __irq_invalid @ 9 + .word __irq_invalid @ a + .word __irq_invalid @ b + .word __irq_invalid @ c + .word __irq_invalid @ d + .word __irq_invalid @ e + .word __irq_invalid @ f + +.LCtab_und: .word __und_usr @ 0 (USR_26 / USR_32) + .word __und_invalid @ 1 (FIQ_26 / FIQ_32) + .word __und_invalid @ 2 (IRQ_26 / IRQ_32) + .word __und_svc @ 3 (SVC_26 / SVC_32) + .word __und_invalid @ 4 + .word __und_invalid @ 5 + .word __und_invalid @ 6 + .word __und_invalid @ 7 + .word __und_invalid @ 8 + .word __und_invalid @ 9 + .word __und_invalid @ a + .word __und_invalid @ b + .word __und_invalid @ c + .word __und_invalid @ d + .word __und_invalid @ e + .word __und_invalid @ f + +.LCtab_dabt: .word __dabt_usr @ 0 (USR_26 / USR_32) + .word __dabt_invalid @ 1 (FIQ_26 / FIQ_32) + .word __dabt_invalid @ 2 (IRQ_26 / IRQ_32) + .word __dabt_svc @ 3 (SVC_26 / SVC_32) + .word __dabt_invalid @ 4 + .word __dabt_invalid @ 5 + .word __dabt_invalid @ 6 + .word __dabt_invalid @ 7 + .word __dabt_invalid @ 8 + .word __dabt_invalid @ 9 + .word __dabt_invalid @ a + .word __dabt_invalid @ b + .word __dabt_invalid @ c + .word __dabt_invalid @ d + .word __dabt_invalid @ e + .word __dabt_invalid @ f + +.LCtab_pabt: .word __pabt_usr + .word __pabt_invalid + +.LCvswi: .word vector_swi + +.LCsirq: .word __temp_irq +.LCsund: .word __temp_und +.LCsabt: .word __temp_abt + +__stubs_end: + + .equ __real_stubs_start, .LCvectors + 0x200 + +.LCvectors: swi SYS_ERROR0 + b __real_stubs_start + (vector_undefinstr - __stubs_start) + ldr pc, __real_stubs_start + (.LCvswi - __stubs_start) + b __real_stubs_start + (vector_prefetch - __stubs_start) + b __real_stubs_start + (vector_data - __stubs_start) + b __real_stubs_start + (vector_addrexcptn - __stubs_start) + b __real_stubs_start + (vector_IRQ - __stubs_start) + b __real_stubs_start + (vector_FIQ - __stubs_start) + +ENTRY(trap_init) + stmfd sp!, {r4 - r6, lr} + + adr r1, .LCvectors @ set up the vectors + mov r0, #0 + ldmia r1, {r1, r2, r3, r4, r5, r6, ip, lr} + stmia r0, {r1, r2, r3, r4, r5, r6, ip, lr} + + add r2, r0, #0x200 + adr r0, __stubs_start @ copy stubs to 0x200 + adr r1, __stubs_end +1: ldr r3, [r0], #4 + str r3, [r2], #4 + cmp r0, r1 + blt 1b + LOADREGS(fd, sp!, {r4 - r6, pc}) + .data +/* + * Do not reorder these, and do not insert extra data between... + */ + __temp_irq: .word 0 @ saved lr_irq .word 0 @ saved spsr_irq .word -1 @ old_r0 @@ -800,3 +891,10 @@ __temp_und: .word 0 @ Saved lr_und __temp_abt: .word 0 @ Saved lr_abt .word 0 @ Saved spsr_abt .word -1 @ old_r0 + + .globl SYMBOL_NAME(cr_alignment) + .globl SYMBOL_NAME(cr_no_alignment) +SYMBOL_NAME(cr_alignment): + .space 4 +SYMBOL_NAME(cr_no_alignment): + .space 4 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index c77c0ea51..2fc0fdddc 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -1,51 +1,54 @@ /*============================================================================ * All exits to user mode from the kernel go through this code. */ - -#include <linux/config.h> - .globl ret_from_sys_call -ret_from_exception: - adr r0, 1f - ldmia r0, {r0, r1} + .align 5 +fast_syscall_return: + str r0, [sp, #S_R0 + 4] @ returned r0 +slow_syscall_return: + add sp, sp, #4 +ret_from_sys_call: + adr r0, bh_data + ldmia r0, {r0, r4} ldr r0, [r0] - ldr r1, [r1] + ldr r1, [r4] tst r0, r1 blne SYMBOL_NAME(do_bottom_half) -ret_from_intr: ldr r0, [sp, #S_PSR] - tst r0, #3 - beq ret_with_reschedule - b ret_from_all +ret_with_reschedule: + get_current_task r1 @ check for scheduling + ldr r0, [r1, #TSK_NEED_RESCHED] + teq r0, #0 + bne ret_reschedule + ldr r1, [r1, #TSK_SIGPENDING] + teq r1, #0 @ check for signals + bne ret_signal + +ret_from_all: restore_user_regs ret_signal: mov r1, sp adrsvc al, lr, ret_from_all + mov r2, r4 b SYMBOL_NAME(do_signal) -2: bl SYMBOL_NAME(schedule) +ret_reschedule: adrsvc al, lr, ret_with_reschedule + b SYMBOL_NAME(schedule) -ret_from_sys_call: - adr r0, 1f + .globl ret_from_exception +ret_from_exception: + adr r0, bh_data ldmia r0, {r0, r1} ldr r0, [r0] ldr r1, [r1] + mov r4, #0 tst r0, r1 - adrsvc ne, lr, ret_from_intr - bne SYMBOL_NAME(do_bottom_half) - -ret_with_reschedule: - get_current_task r1 - ldr r0, [r1, #TSK_NEED_RESCHED] - teq r0, #0 - bne 2b - ldr r1, [r1, #TSK_SIGPENDING] - teq r1, #0 - bne ret_signal - -ret_from_all: restore_user_regs + blne SYMBOL_NAME(do_bottom_half) + ldr r0, [sp, #S_PSR] + tst r0, #3 @ returning to user mode? + beq ret_with_reschedule + b ret_from_all -1: .word SYMBOL_NAME(bh_mask) - .word SYMBOL_NAME(bh_active) +#include "calls.S" /*============================================================================= * SWI handler @@ -57,84 +60,65 @@ ret_from_all: restore_user_regs * too worried. */ -#include "calls.S" - + .align 5 vector_swi: save_user_regs - mov fp, #0 mask_pc lr, lr - ldr r6, [lr, #-4]! @ get SWI instruction + mov fp, #0 + ldr r6, [lr, #-4] @ get SWI instruction arm700_bug_check r6, r7 +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, .LCswi + ldr r7, [r7] + mcr p15, 0, r7, c1, c0 +#endif enable_irqs r7 - + + str r4, [sp, #-4]! @ new style: (r0 = arg1, r4 = arg5) + adrsvc al, lr, fast_syscall_return + bic r6, r6, #0xff000000 @ mask off SWI op-code eor r6, r6, #OS_NUMBER<<20 @ check OS number cmp r6, #NR_syscalls @ check upper syscall limit bcs 2f - get_current_task r5 - ldr ip, [r5, #TSK_FLAGS] @ check for syscall tracing + get_current_task r7 + ldr ip, [r7, #TSK_FLAGS] @ check for syscall tracing + adr r5, SYMBOL_NAME(sys_call_table) tst ip, #PF_TRACESYS - bne 1f + ldreq pc, [r5, r6, lsl #2] @ call sys routine - adr ip, SYMBOL_NAME(sys_call_table) - str r4, [sp, #-4]! @ new style: (r0 = arg1, r5 = arg5) - mov lr, pc - ldr pc, [ip, r6, lsl #2] @ call sys routine - add sp, sp, #4 - str r0, [sp, #S_R0] @ returned r0 - b ret_from_sys_call - -1: ldr r7, [sp, #S_IP] @ save old IP + ldr r7, [sp, #S_IP + 4] @ save old IP mov r0, #0 - str r0, [sp, #S_IP] @ trace entry [IP = 0] + str r0, [sp, #S_IP + 4] @ trace entry [IP = 0] bl SYMBOL_NAME(syscall_trace) - str r7, [sp, #S_IP] - ldmia sp, {r0 - r3} @ have to reload r0 - r3 - adr ip, SYMBOL_NAME(sys_call_table) - str r4, [sp, #-4]! @ new style: (r0 = arg1, r5 = arg5) + str r7, [sp, #S_IP + 4] + + ldmib sp, {r0 - r3} @ have to reload r0 - r3 mov lr, pc - ldr pc, [ip, r6, lsl #2] @ call sys routine - add sp, sp, #4 - str r0, [sp, #S_R0] @ returned r0 + ldr pc, [r5, r6, lsl #2] @ call sys routine + str r0, [sp, #S_R0 + 4] @ returned r0 + mov r0, #1 - str r0, [sp, #S_IP] @ trace exit [IP = 1] + str r0, [sp, #S_IP + 4] @ trace exit [IP = 1] bl SYMBOL_NAME(syscall_trace) - str r7, [sp, #S_IP] - b ret_from_sys_call + str r7, [sp, #S_IP + 4] + b slow_syscall_return -2: tst r6, #0x00f00000 @ is it a Unix SWI? +2: add r1, sp, #4 + tst r6, #0x00f00000 @ is it a Unix SWI? bne 3f - cmp r6, #(KSWI_SYS_BASE - KSWI_BASE) - bcc 4f @ not private func - bic r0, r6, #0x000f0000 - mov r1, sp - bl SYMBOL_NAME(arm_syscall) - b ret_from_sys_call - -3: eor r0, r6, #OS_NUMBER<<20 @ Put OS number back - mov r1, sp - bl SYMBOL_NAME(deferred) - ldmfd sp, {r0 - r3} - b ret_from_sys_call - -4: bl SYMBOL_NAME(sys_ni_syscall) - str r0, [sp, #0] @ returned r0 - b ret_from_sys_call + subs r0, r6, #(KSWI_SYS_BASE - KSWI_BASE) + bcs SYMBOL_NAME(arm_syscall) + b SYMBOL_NAME(sys_ni_syscall) @ not private func -@ r0 = syscall number -@ r1 = syscall r0 -@ r5 = syscall r4 -@ ip = syscall table -SYMBOL_NAME(sys_syscall): - mov r6, r0 - eor r6, r6, #OS_NUMBER << 20 - cmp r6, #NR_syscalls @ check range - movgt r0, #-ENOSYS - movgt pc, lr - add sp, sp, #4 @ take of the save of our r4 - ldmib sp, {r0 - r4} @ get our args - str r4, [sp, #-4]! @ Put our arg on the stack - ldr pc, [ip, r6, lsl #2] +3: eor r0, r6, #OS_NUMBER <<20 @ Put OS number back + adrsvc al, lr, slow_syscall_return + b SYMBOL_NAME(deferred) + + .align 5 + +bh_data: .word SYMBOL_NAME(bh_mask) + .word SYMBOL_NAME(bh_active) ENTRY(sys_call_table) #include "calls.S" @@ -142,10 +126,25 @@ ENTRY(sys_call_table) /*============================================================================ * Special system call wrappers */ +@ r0 = syscall number +@ r5 = syscall table +SYMBOL_NAME(sys_syscall): + eor r6, r0, #OS_NUMBER << 20 + cmp r6, #NR_syscalls @ check range + ldmleib sp, {r0 - r4} @ get our args + strle r4, [sp] @ Put our arg on the stack + ldrle pc, [r5, r6, lsl #2] + mov r0, #-ENOSYS + mov pc, lr + sys_fork_wrapper: add r0, sp, #4 b SYMBOL_NAME(sys_fork) +sys_vfork_wrapper: + add r0, sp, #4 + b SYMBOL_NAME(sys_vfork) + sys_execve_wrapper: add r3, sp, #4 b SYMBOL_NAME(sys_execve) @@ -192,99 +191,6 @@ sys_sigaltstack_wrapper: ldr r2, [sp, #4 + S_SP] b do_sigaltstack -/* - *============================================================================= - * Low-level interface code - *----------------------------------------------------------------------------- - * Trap initialisation - *----------------------------------------------------------------------------- - * - * Note - FIQ code has changed. The default is a couple of words in 0x1c, 0x20 - * that call _unexp_fiq. Nowever, we now copy the FIQ routine to 0x1c (removes - * some excess cycles). - * - * What we need to put into 0-0x1c are ldrs to branch to 0xC0000000 - * (the kernel). - * 0x1c onwards is reserved for FIQ, so I think that I will allocate 0xe0 onwards for - * the actual address to jump to. - */ - - .section ".text.init",#alloc,#execinstr - -#if defined(CONFIG_CPU_32) -/* - * these go into 0x00 - */ -.Lbranches: swi SYS_ERROR0 - ldr pc, .Lbranches + 0xe4 - ldr pc, .Lbranches + 0xe8 - ldr pc, .Lbranches + 0xec - ldr pc, .Lbranches + 0xf0 - ldr pc, .Lbranches + 0xf4 - ldr pc, .Lbranches + 0xf8 - ldr pc, .Lbranches + 0xfc -/* - * this is put into 0xe4 and above - */ -.Ljump_addresses: - .word vector_undefinstr @ 0xe4 - .word vector_swi @ 0xe8 - .word vector_prefetch @ 0xec - .word vector_data @ 0xf0 - .word vector_addrexcptn @ 0xf4 - .word vector_IRQ @ 0xf8 - .word _unexp_fiq @ 0xfc -/* - * initialise the trap system - */ -ENTRY(trap_init) - stmfd sp!, {r4 - r7, lr} - initialise_traps_extra - mov r0, #0xe4 - adr r1, .Ljump_addresses - ldmia r1, {r1 - r7} - stmia r0, {r1 - r7} - mov r0, #0 - adr r1, .Lbranches - ldmia r1, {r1 - r7} - stmia r0, {r1 - r7} - LOADREGS(fd, sp!, {r4 - r7, pc}) -#elif defined(CONFIG_CPU_26) -.Ljump_addresses: - swi SYS_ERROR0 - .word vector_undefinstr - 12 - .word vector_swi - 16 - .word vector_prefetch - 20 - .word vector_data - 24 - .word vector_addrexcptn - 28 - .word vector_IRQ - 32 - .word _unexp_fiq - 36 - b . + 8 -/* - * initialise the trap system - */ -ENTRY(trap_init) - stmfd sp!, {r4 - r7, lr} - adr r1, .Ljump_addresses - ldmia r1, {r1 - r7, ip, lr} - orr r2, lr, r2, lsr #2 - orr r3, lr, r3, lsr #2 - orr r4, lr, r4, lsr #2 - orr r5, lr, r5, lsr #2 - orr r6, lr, r6, lsr #2 - orr r7, lr, r7, lsr #2 - orr ip, lr, ip, lsr #2 - mov r0, #0 - stmia r0, {r1 - r7, ip} - ldmfd sp!, {r4 - r7, pc}^ -#endif - - .previous - -/*============================================================================ - * FP support - */ - .data ENTRY(fp_enter) diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index a5da15c7f..e3e87469f 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -2,6 +2,8 @@ * linux/arch/arm/kernel/fiq.c * * Copyright (C) 1998 Russell King + * Copyright (C) 1998, 1999 Phil Blundell + * * FIQ support written by Philip Blundell <philb@gnu.org>, 1998. * * FIQ support re-written by Russell King to be more generic @@ -78,7 +80,7 @@ int fiq_def_op(void *ref, int relinquish) unprotect_page_0(); *(unsigned long *)FIQ_VECTOR = no_fiq_insn; protect_page_0(); - __flush_entry_to_ram(FIQ_VECTOR); + flush_icache_range(FIQ_VECTOR, FIQ_VECTOR + 4); } return 0; @@ -106,28 +108,77 @@ void set_fiq_handler(void *start, unsigned int length) memcpy((void *)FIQ_VECTOR, start, length); protect_page_0(); -#ifdef CONFIG_CPU_32 - processor.u.armv3v4._flush_cache_area(FIQ_VECTOR, FIQ_VECTOR + length, 1); -#endif + flush_icache_range(FIQ_VECTOR, FIQ_VECTOR + length); } +/* + * Taking an interrupt in FIQ mode is death, so both these functions + * disable irqs for the duration. + */ void set_fiq_regs(struct pt_regs *regs) { - /* not yet - - * this is temporary to get the floppy working - * again on RiscPC. It *will* become more - * generic. - */ -#ifdef CONFIG_ARCH_ACORN - extern void floppy_fiqsetup(unsigned long len, unsigned long addr, - unsigned long port); - floppy_fiqsetup(regs->ARM_r9, regs->ARM_r10, regs->ARM_fp); + register unsigned long tmp, tmp2; + __asm__ volatile ( +#ifdef CONFIG_CPU_26 + "mov %0, pc + bic %1, %0, #0x3 + orr %1, %1, #0x0c000001 + teqp %1, #0 @ select FIQ mode + mov r0, r0 + ldmia %2, {r8 - r14} + teqp %0, #0 @ return to SVC mode + mov r0, r0" #endif +#ifdef CONFIG_CPU_32 + "mrs %0, cpsr + bic %1, %0, #0xf + orr %1, %1, #0xc1 + msr cpsr, %1 @ select FIQ mode + mov r0, r0 + ldmia %2, {r8 - r14} + msr cpsr, %0 @ return to SVC mode + mov r0, r0" +#endif + : "=r" (tmp), "=r" (tmp2) + : "r" (®s->ARM_r8) + /* These registers aren't modified by the above code in a way + visible to the compiler, but we mark them as clobbers anyway + so that GCC won't put any of the input or output operands in + them. */ + : "r8", "r9", "r10", "r11", "r12", "r13", "r14"); } void get_fiq_regs(struct pt_regs *regs) { - /* not yet */ + register unsigned long tmp, tmp2; + __asm__ volatile ( +#ifdef CONFIG_CPU_26 + "mov %0, pc + bic %1, %0, #0x3 + orr %1, %1, #0x0c000001 + teqp %1, #0 @ select FIQ mode + mov r0, r0 + stmia %2, {r8 - r14} + teqp %0, #0 @ return to SVC mode + mov r0, r0" +#endif +#ifdef CONFIG_CPU_32 + "mrs %0, cpsr + bic %1, %0, #0xf + orr %1, %1, #0xc1 + msr cpsr, %1 @ select FIQ mode + mov r0, r0 + stmia %2, {r8 - r14} + msr cpsr, %0 @ return to SVC mode + mov r0, r0" +#endif + : "=r" (tmp), "=r" (tmp2) + : "r" (®s->ARM_r8) + /* These registers aren't modified by the above code in a way + visible to the compiler, but we mark them as clobbers anyway + so that GCC won't put any of the input or output operands in + them. */ + : "r8", "r9", "r10", "r11", "r12", "r13", "r14"); } int claim_fiq(struct fiq_handler *f) diff --git a/arch/arm/kernel/head-armv.S b/arch/arm/kernel/head-armv.S index cd4be86cb..2e13f0818 100644 --- a/arch/arm/kernel/head-armv.S +++ b/arch/arm/kernel/head-armv.S @@ -7,13 +7,21 @@ */ #include <linux/config.h> #include <linux/linkage.h> +#include <asm/hardware.h> +#include <asm/dec21285.h> + + .globl SYMBOL_NAME(swapper_pg_dir) + .equ SYMBOL_NAME(swapper_pg_dir), TEXTADDR - 0x4000 + + .section ".text.init",#alloc,#execinstr +ENTRY(stext) +ENTRY(_stext) -#ifndef CONFIG_ARCH_VNC #if (TEXTADDR & 0xffff) != 0x8000 #error TEXTADDR must start at 0xXXXX8000 #endif -#else - .text + +#ifdef CONFIG_ARCH_NETWINDER mov r0, r0 mov r0, r0 mov r0, r0 @@ -22,16 +30,34 @@ mov r0, r0 mov r0, r0 mov r0, r0 + + adr r2, 1f + ldmdb r2, {r7, r8} + and r3, r2, #0x0000c000 + teq r3, #0x00008000 + beq __entry + bic r3, r2, #0xc000 + orr r3, r3, #0x8000 + mov r0, r3 + mov r4, #32 + sub r5, r8, r7 + b 1f + + .word _stext + .word _end + +1: ldmia r2!, {r6, r7, r8, r9} + stmia r3!, {r6, r7, r8, r9} + subs r4, r4, #16 + bcs 1b + movs r4, r5 + mov r5, #0 + movne pc, r0 + mov r0, #0 mov r1, #5 #endif -#define DEBUG - - .globl SYMBOL_NAME(swapper_pg_dir) - .equ SYMBOL_NAME(swapper_pg_dir), TEXTADDR - 0x4000 - - .text /* * Entry point and restart point. Entry *must* be called with r0 == 0, * MMU off. Note! These should be unique!!! Please read Documentation/ARM-README @@ -45,16 +71,15 @@ * r1 = 5 -> Corel Netwinder * r1 = 6 -> CATS * r1 = 7 -> tbox + * r1 = 8 -> SA110/21285 as co-processor */ -ENTRY(stext) -ENTRY(_stext) __entry: teq r0, #0 @ check for illegal entry... bne .Lerror @ loop indefinitely - cmp r1, #8 @ Unknown machine architecture + cmp r1, #9 @ Unknown machine architecture bge .Lerror -/* First thing to do is to get the page tables set up so that we can call the kernel - * in the correct place. This is relocatable code... +/* First thing to do is to get the page tables set up so that we can call + * the kernel in the correct place. This is relocatable code... * - Read processor ID register (CP#15, CR0). */ mrc p15, 0, r9, c0, c0 @ get Processor ID @@ -74,7 +99,7 @@ __entry: teq r0, #0 @ check for illegal entry... adr r4, .LCMachTypes add r4, r4, r1, lsl #4 - ldmia r4, {r4, r5, r6} + ldmia r4, {r4, r5, r6, r7} /* * r4 = page dir in physical ram * r5 = physical address of start of RAM @@ -99,26 +124,28 @@ __entry: teq r0, #0 @ check for illegal entry... add r3, r3, #1 << 20 str r3, [r0], #4 add r3, r3, #1 << 20 -#ifdef DEBUG +#ifdef CONFIG_DEBUG_LL /* Map in IO space * This allows debug messages to be output via a serial * before/while paging_init. */ - add r0, r4, #0x3800 + add r0, r4, r7 orr r3, r6, r8 add r2, r0, #0x0800 1: str r3, [r0], #4 add r3, r3, #1 << 20 teq r0, r2 bne 1b -#ifdef CONFIG_ARCH_VNC - add r0, r4, #0x3f00 - add r0, r0, #0x00f8 +#ifdef CONFIG_ARCH_NETWINDER + teq r1, #5 + bne 1f + add r0, r4, #0x3fc0 mov r3, #0x7c000000 orr r3, r3, r8 str r3, [r0], #4 add r3, r3, #1 << 20 str r3, [r0], #4 +1: #endif #endif #ifdef CONFIG_ARCH_RPC @@ -168,49 +195,55 @@ __entry: teq r0, #0 @ check for illegal entry... .LCMachTypes: .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0xe0000000 @ I/O address - .long 0 + .long 0x3800 @ Acorn RiscPC .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x10000000 .long 0x10000000 .long 0x03000000 - .long 0 + .long 0x3800 @ EBSIT ??? .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 .long 0 .long 0xe0000000 - .long 0 + .long 0x3800 @ NexusPCI .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x40000000 .long 0x40000000 .long 0x10000000 - .long 0 + .long 0x3800 @ DEC EBSA285 .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0x24000000 @ I/O base address (0x42000000 -> 0xFE000000) - .long 0 + .long 0x3800 @ Corel VNC .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0x24000000 @ I/O base address (0x42000000 -> 0xfe000000) - .long 0 + .long 0x3800 @ CATS .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0x24000000 @ I/O base address (0x42000000 -> 0xfe000000) - .long 0 + .long 0x3800 @ tbox .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x80000000 .long 0x80000000 @ Address of RAM .long 0x00400000 @ Uart - .long 0 + .long 0x3800 + + @ DEC EBSA285 as co-processor + .long 0x4000 @ Address of page tables (physical) + .long 0 @ Address of RAM + .long DC21285_ARMCSR_BASE @ Physical I/O base address + .long 0x7cf00000 >> 18 @ Virtual I/O base address .LCProcTypes: @ ARM6 / 610 .long 0x41560600 @@ -250,7 +283,11 @@ __entry: teq r0, #0 @ check for illegal entry... mcr p15, 0, r4, c2, c0 @ load page table pointer mov r0, #0x1f @ Domains 0, 1 = client mcr p15, 0, r0, c3, c0 @ load domain access register +#ifdef CONFIG_ALIGNMENT_TRAP + mov r0, #0x3f @ ....S..DPWCAM +#else mov r0, #0x3d @ ....S..DPWC.M +#endif orr r0, r0, #0x100 mov pc, lr @@ -261,7 +298,11 @@ __entry: teq r0, #0 @ check for illegal entry... mcr p15, 0, r4, c2, c0 @ load page table pointer mov r0, #0x1f @ Domains 0, 1 = client mcr p15, 0, r0, c3, c0 @ load domain access register +#ifdef CONFIG_ALIGNMENT_TRAP + mov r0, #0x7f @ ....S.LDPWCAM +#else mov r0, #0x7d @ ....S.LDPWC.M +#endif orr r0, r0, #0x100 mov pc, lr @@ -276,32 +317,38 @@ __entry: teq r0, #0 @ check for illegal entry... mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, #0x0e00 bic r0, r0, #0x0002 +#ifdef CONFIG_ALIGNMENT_TRAP + orr r0, r0, #0x003f @ I...S..DPWCAM +#else orr r0, r0, #0x003d @ I...S..DPWC.M +#endif orr r0, r0, #0x1100 @ v4 supports separate I cache mov pc, lr - .section ".text.init",#alloc,#execinstr - .Lsa_fastclock: mcr p15, 0, r4, c15, c1, 2 @ Enable clock switching mov pc, lr .LC0: .long SYMBOL_NAME(__entry) - .long SYMBOL_NAME(machine_type) + .long SYMBOL_NAME(__machine_arch_type) .long SYMBOL_NAME(__bss_start) .long SYMBOL_NAME(processor_id) .long SYMBOL_NAME(_end) + .long SYMBOL_NAME(cr_alignment) .long SYMBOL_NAME(init_task_union)+8192 .align .Lalready_done_mmap: adr r4, .LC0 - ldmia r4, {r3, r4, r5, r6, r8, sp} @ Setup stack + ldmia r4, {r3, r4, r5, r6, r7, r8, sp} @ Setup stack add r10, r10, r3 @ Add base back in mov fp, #0 -1: cmp r5, r8 @ Clear BSS +1: cmp r5, r7 @ Clear BSS strcc fp, [r5],#4 bcc 1b + bic r2, r0, #2 @ Clear 'A' bit + stmia r8, {r0, r2} @ Save control register values + str r1, [r4] @ Save machine type str r9, [r6] @ Save processor ID mov lr, pc @@ -310,10 +357,12 @@ __entry: teq r0, #0 @ check for illegal entry... b SYMBOL_NAME(start_kernel) .text -#ifdef DEBUG + +#ifdef CONFIG_DEBUG_LL /* * Some debugging routines (useful if you've got MM problems and - * printk isn't working). For DEBUGGING ONLY!!! + * printk isn't working). For DEBUGGING ONLY!!! Do not leave + * references to these in a production kernel! */ #if defined(CONFIG_ARCH_RPC) .macro addruart,rx @@ -362,64 +411,71 @@ __entry: teq r0, #0 @ check for illegal entry... beq 1001b .endm -#elif defined(CONFIG_ARCH_EBSA285) +#elif defined(CONFIG_HOST_FOOTBRIDGE) || defined(CONFIG_ADDIN_FOOTBRIDGE) +#ifndef CONFIG_DEBUG_DC21285_PORT + /* For NetWinder debugging */ .macro addruart,rx - mov \rx, #0xfe000000 + mov \rx, #0xff000000 + orr \rx, \rx, #0x000003f8 .endm .macro senduart,rd,rx - str \rd, [\rx, #0x160] @ UARTDR + strb \rd, [\rx] .endm .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #0x178] @ UARTFLG - tst \rd, #1 << 3 - bne 1001b +1002: ldrb \rd, [\rx, #0x5] + and \rd, \rd, #0x60 + teq \rd, #0x60 + bne 1002b .endm .macro waituart,rd,rx +1001: ldrb \rd, [\rx, #0x6] + tst \rd, #0x10 + beq 1001b .endm +#else + /* For EBSA285 debugging */ + .equ dc21285_high, ARMCSR_BASE & 0xff000000 + .equ dc21285_low, ARMCSR_BASE & 0x00ffffff -#elif defined(CONFIG_ARCH_NEXUSPCI) .macro addruart,rx - ldr \rx, =0xfff00000 + mov \rx, #dc21285_high + .if dc21285_low + orr \rx, \rx, #dc21285_low + .endif .endm .macro senduart,rd,rx - str \rd, [\rx, #0xc] + str \rd, [\rx, #0x160] @ UARTDR .endm .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #0x4] - tst \rd, #1 << 0 +1001: ldr \rd, [\rx, #0x178] @ UARTFLG + tst \rd, #1 << 3 bne 1001b .endm .macro waituart,rd,rx .endm - -#elif defined(CONFIG_ARCH_VNC) +#endif +#elif defined(CONFIG_ARCH_NEXUSPCI) .macro addruart,rx - mov \rx, #0xff000000 - orr \rx, \rx, #0x00e00000 - orr \rx, \rx, #0x000003f8 + ldr \rx, =0xfff00000 .endm .macro senduart,rd,rx - strb \rd, [\rx] + str \rd, [\rx, #0xc] .endm .macro busyuart,rd,rx -1002: ldrb \rd, [\rx, #0x5] - and \rd, \rd, #0x60 - teq \rd, #0x60 - bne 1002b +1001: ldr \rd, [\rx, #0x4] + tst \rd, #1 << 0 + bne 1001b .endm .macro waituart,rd,rx -1001: ldrb \rd, [\rx, #0x6] - tst \rd, #0x10 - beq 1001b .endm #else #error Unknown architecture @@ -476,8 +532,6 @@ ENTRY(printch) mov r0, #0 b 1b - .ltorg - .bss hexbuf: .space 16 diff --git a/arch/arm/kernel/hw-ebsa285.c b/arch/arm/kernel/hw-ebsa285.c deleted file mode 100644 index e3385696b..000000000 --- a/arch/arm/kernel/hw-ebsa285.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * arch/arm/kernel/hw-ebsa286.c - * - * EBSA285 hardware specific functions - * - * Copyright (C) 1998 Russell King, Phil Blundel - */ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/ptrace.h> -#include <linux/interrupt.h> -#include <linux/init.h> - -#include <asm/irq.h> -#include <asm/system.h> - -extern int setup_arm_irq(int, struct irqaction *); - -extern void pci_set_cmd(struct pci_dev *dev, unsigned short clear, unsigned short set); -extern void pci_set_base_addr(struct pci_dev *dev, int idx, unsigned int addr); -extern void pci_set_irq_line(struct pci_dev *dev, unsigned int irq); - -static int irqmap_ebsa[] __initdata = { 9, 8, 18, 11 }; -static int irqmap_cats[] __initdata = { 18, 8, 9, 11 }; - -__initfunc(static int ebsa_irqval(struct pci_dev *dev)) -{ - unsigned char pin; - - pcibios_read_config_byte(dev->bus->number, - dev->devfn, - PCI_INTERRUPT_PIN, - &pin); - - return irqmap_ebsa[(PCI_SLOT(dev->devfn) + pin) & 3]; -} - -__initfunc(static int cats_irqval(struct pci_dev *dev)) -{ - if (dev->irq >= 128) - return 32 + (dev->irq & 0x1f); - - switch (dev->irq) { - case 1: - case 2: - case 3: - case 4: - return irqmap_cats[dev->irq - 1]; - case 0: - return 0; - } - - printk("PCI: device %02x:%02x has unknown irq line %x\n", - dev->bus->number, dev->devfn, dev->irq); - return 0; -} - -__initfunc(void pcibios_fixup_ebsa285(struct pci_dev *dev)) -{ - char cmd; - - /* sort out the irq mapping for this device */ - switch (machine_type) { - case MACH_TYPE_EBSA285: - dev->irq = ebsa_irqval(dev); - break; - case MACH_TYPE_CATS: - dev->irq = cats_irqval(dev); - break; - } - - /* Turn on bus mastering - boot loader doesn't - * - perhaps it should! - dag - */ - pci_read_config_byte(dev, PCI_COMMAND, &cmd); - pci_write_config_byte(dev, PCI_COMMAND, cmd | PCI_COMMAND_MASTER); -} - -static void irq_pci_err(int irq, void *dev_id, struct pt_regs *regs) -{ - const char *err = "unknown"; - unsigned long cmd = *(unsigned long *)0xfe000004 & 0xffff; - unsigned long ctrl = *(unsigned long *)0xfe00013c & 0xffffde07; - static unsigned long next_warn[7]; - int idx = 6; - - switch(irq) { - case IRQ_PCIPARITY: - *(unsigned long *)0xfe000004 = cmd | 1 << 31; - idx = 0; - err = "parity"; - break; - - case IRQ_PCITARGETABORT: - *(unsigned long *)0xfe000004 = cmd | 1 << 28; - idx = 1; - err = "target abort"; - break; - - case IRQ_PCIMASTERABORT: - *(unsigned long *)0xfe000004 = cmd | 1 << 29; - idx = 2; - err = "master abort"; - break; - - case IRQ_PCIDATAPARITY: - *(unsigned long *)0xfe000004 = cmd | 1 << 24; - idx = 3; - err = "data parity"; - break; - - case IRQ_DISCARDTIMER: - *(unsigned long *)0xfe00013c = ctrl | 1 << 8; - idx = 4; - err = "discard timer"; - break; - - case IRQ_SERR: - *(unsigned long *)0xfe00013c = ctrl | 1 << 3; - idx = 5; - err = "system"; - break; - } - if (time_after_eq(jiffies, next_warn[idx])) { - next_warn[idx] = jiffies + 3 * HZ / 100; - printk(KERN_ERR "PCI %s error detected\n", err); - } -} - -static struct irqaction irq_pci_error = { - irq_pci_err, SA_INTERRUPT, 0, "PCI error", NULL, NULL -}; - -__initfunc(void pcibios_init_ebsa285(void)) -{ - setup_arm_irq(IRQ_PCIPARITY, &irq_pci_error); - setup_arm_irq(IRQ_PCITARGETABORT, &irq_pci_error); - setup_arm_irq(IRQ_PCIMASTERABORT, &irq_pci_error); - setup_arm_irq(IRQ_PCIDATAPARITY, &irq_pci_error); - setup_arm_irq(IRQ_DISCARDTIMER, &irq_pci_error); - setup_arm_irq(IRQ_SERR, &irq_pci_error); - - /* - * Map our SDRAM at a known address in PCI space, just in case - * the firmware had other ideas. Using a nonzero base is slightly - * bizarre but apparently necessary to avoid problems with some - * video cards. - * - * We should really only do this if the central function is enabled. - */ - *(unsigned long *)0xfe000010 = 0; - *(unsigned long *)0xfe000018 = 0xe0000000; - *(unsigned long *)0xfe0000f8 = 0; - *(unsigned long *)0xfe0000fc = 0; - *(unsigned long *)0xfe000100 = 0x01fc0000; - *(unsigned long *)0xfe000104 = 0; - *(unsigned long *)0xfe000108 = 0x80000000; - *(unsigned long *)0xfe000004 = 0x17; -} diff --git a/arch/arm/kernel/hw-footbridge.c b/arch/arm/kernel/hw-footbridge.c new file mode 100644 index 000000000..857f120e1 --- /dev/null +++ b/arch/arm/kernel/hw-footbridge.c @@ -0,0 +1,893 @@ +/* + * arch/arm/kernel/hw-footbridge.c + * + * Footbridge-dependent machine fixup + * + * Copyright (C) 1998, 1999 Russell King, Phil Blundell + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/ptrace.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/init.h> + +#include <asm/dec21285.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/leds.h> +#include <asm/system.h> + +#define IRDA_IO_BASE 0x180 +#define ETHER10_IO_BASE 0x301 +#define GP1_IO_BASE 0x338 +#define GP2_IO_BASE 0x33a +#define DEC21143_IO_BASE 0x401 +#define DEC21143_MEM_BASE 0x00800000 +#define CYBER2000_MEM_BASE 0x01000000 + +int have_isa_bridge; + +extern int setup_arm_irq(int, struct irqaction *); +extern void pci_set_cmd(struct pci_dev *dev, unsigned short clear, unsigned short set); +extern void pci_set_base_addr(struct pci_dev *dev, int idx, unsigned int addr); +extern void pci_set_irq_line(struct pci_dev *dev, unsigned int irq); +extern void (*kd_mksound)(unsigned int hz, unsigned int ticks); + +#ifdef CONFIG_PCI + +static int irqmap_ebsa[] __initdata = { IRQ_IN1, IRQ_IN0, IRQ_PCI, IRQ_IN3 }; + +__initfunc(static int ebsa_irqval(struct pci_dev *dev)) +{ + unsigned char pin; + + pcibios_read_config_byte(dev->bus->number, + dev->devfn, + PCI_INTERRUPT_PIN, + &pin); + + return irqmap_ebsa[(PCI_SLOT(dev->devfn) + pin) & 3]; +} + +#ifdef CONFIG_CATS +static int irqmap_cats[] __initdata = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; + +__initfunc(static int cats_irqval(struct pci_dev *dev)) +{ + if (dev->irq >= 128) + return 16 + (dev->irq & 0x1f); + + switch (dev->irq) { + case 1: + case 2: + case 3: + case 4: + return irqmap_cats[dev->irq - 1]; + case 0: + return 0; + } + + printk("PCI: device %02x:%02x has unknown irq line %x\n", + dev->bus->number, dev->devfn, dev->irq); + return 0; +} +#endif + +__initfunc(void pcibios_fixup_ebsa285(struct pci_dev *dev)) +{ + /* Latency timer of 32 */ + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 32); + + /* 32-byte cache line size */ + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 8); + + /* Set SysErr enable, Parity enable */ + pci_set_cmd(dev, 0, PCI_COMMAND_FAST_BACK | PCI_COMMAND_SERR | PCI_COMMAND_PARITY); + + /* If this device is an ISA bridge, set the + * have_isa_bridge flag. We will then go looking + * for things like keyboard, etc + */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA || + (dev->class >> 8) == PCI_CLASS_BRIDGE_EISA) + have_isa_bridge = !0; + + /* sort out the irq mapping for this device */ + switch (machine_arch_type) { + case MACH_TYPE_EBSA285: + dev->irq = ebsa_irqval(dev); + /* Turn on bus mastering - boot loader doesn't + * - perhaps it should! - dag + */ + pci_set_cmd(dev, 0, PCI_COMMAND_MASTER); + break; + +#ifdef CONFIG_CATS + case MACH_TYPE_CATS: + dev->irq = cats_irqval(dev); + /* Turn on bus mastering - boot loader doesn't + * - perhaps it should! - dag + */ + pci_set_cmd(dev, 0, PCI_COMMAND_MASTER); + break; +#endif +#ifdef CONFIG_ARCH_NETWINDER + case MACH_TYPE_NETWINDER: + /* disable ROM */ + pci_write_config_dword(dev, PCI_ROM_ADDRESS, 0); + +#define DEV(v,d) ((v)<<16|(d)) + switch (DEV(dev->vendor, dev->device)) { + /* Ether 100 */ + case DEV(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21142): + pci_set_base_addr(dev, 0, DEC21143_IO_BASE); + pci_set_base_addr(dev, 1, DEC21143_MEM_BASE); + pci_set_cmd(dev, 0, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY | PCI_COMMAND_IO); + /* Put the chip to sleep in case the driver isn't loaded */ + pci_write_config_dword(dev, 0x40, 0x80000000); + dev->irq = IRQ_NETWINDER_ETHER100; + break; + + /* Ether 10 */ + case DEV(PCI_VENDOR_ID_WINBOND2,0x5a5a): + pci_set_base_addr(dev, 0, ETHER10_IO_BASE); + pci_set_cmd(dev, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY, PCI_COMMAND_IO); + dev->irq = IRQ_NETWINDER_ETHER10; + break; + + /* ISA bridge */ + case DEV(PCI_VENDOR_ID_WINBOND,PCI_DEVICE_ID_WINBOND_83C553): + pci_set_base_addr(dev, 0, 0); + pci_set_cmd(dev, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY, PCI_COMMAND_IO); + /* + * Enable all memory requests from ISA to be channeled to PCI + */ + pci_write_config_byte(dev, 0x48, 255); + /* + * Disable ping-pong (as per errata) + */ + pci_write_config_byte(dev, 0x42, 0); + /* + * Enable PCI packet retry + */ + pci_write_config_byte(dev, 0x40, 0x22); + /* + * Do not use PCI CPU park enable, park on + * last master, disable GAT bit + */ + pci_write_config_byte(dev, 0x83, 0x02); + /* + * Default rotating priorities + */ + pci_write_config_byte(dev, 0x80, 0xe0); + /* + * Rotate bank 4 + */ + pci_write_config_byte(dev, 0x81, 0x01); + break; + + /* IDE */ + case DEV(PCI_VENDOR_ID_WINBOND,PCI_DEVICE_ID_WINBOND_82C105): + pci_set_base_addr(dev, 0, 0x1f1); + pci_set_base_addr(dev, 1, 0x3f5); + pci_set_base_addr(dev, 2, 0x171); + pci_set_base_addr(dev, 3, 0x375); + pci_set_base_addr(dev, 4, 0xe801); + pci_set_cmd(dev, PCI_COMMAND_MEMORY, PCI_COMMAND_MASTER | PCI_COMMAND_IO); + dev->irq = IRQ_ISA_HARDDISK1; + break; + + /* VGA */ + case DEV(PCI_VENDOR_ID_INTERG,0x2000): + pci_set_base_addr(dev, 0, CYBER2000_MEM_BASE); + pci_set_cmd(dev, PCI_COMMAND_MASTER, PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + dev->irq = IRQ_NETWINDER_VGA; + break; + } +#endif + } +} + +static inline void +report_pci_dev_error(void) +{ + struct pci_dev *dev; + + for (dev = pci_devices; dev; dev = dev->next) { + unsigned short status; + + pci_read_config_word(dev, PCI_STATUS, &status); + if (status & 0xf900) { + printk(KERN_DEBUG "PCI: [%04X:%04X] status = %X\n", + dev->vendor, dev->device, status); + + pci_write_config_word(dev, PCI_STATUS, status & 0xf900); + } + } +} +#else +#define report_pci_dev_error() +#endif + +/* + * Warn on PCI errors. Please report any occurances! + */ +static void +irq_pci_err(int irq, void *dev_id, struct pt_regs *regs) +{ + static unsigned long next_warn; + unsigned long cmd = *CSR_PCICMD & 0x0000ffff; + unsigned long ctrl = (*CSR_SA110_CNTL) & 0xffffde07; + unsigned long irqstatus = *CSR_IRQ_RAWSTATUS; + int warn = time_after_eq(jiffies, next_warn); + + ctrl |= SA110_CNTL_DISCARDTIMER; + + if (warn) { + next_warn = jiffies + 3 * HZ / 100; + printk(KERN_DEBUG "PCI: "); + } + + if (irqstatus & (1 << 31)) { + if (warn) + printk("parity error "); + cmd |= 1 << 31; + } + + if (irqstatus & (1 << 30)) { + if (warn) + printk("target abort "); + cmd |= 1 << 28; + } + + if (irqstatus & (1 << 29)) { + if (warn) + printk("master abort "); + cmd |= 1 << 29; + } + + if (irqstatus & (1 << 28)) { + if (warn) + printk("data parity error "); + cmd |= 1 << 24; + } + + if (irqstatus & (1 << 27)) { + if (warn) + printk("discard timer expired "); + ctrl &= ~SA110_CNTL_DISCARDTIMER; + } + + if (irqstatus & (1 << 23)) { + if (warn) + printk("system error "); + ctrl |= SA110_CNTL_RXSERR; + } + + if (warn) + printk("pc=[<%08lX>]\n", instruction_pointer(regs)); + + report_pci_dev_error(); + + *CSR_PCICMD = cmd; + *CSR_SA110_CNTL = ctrl; +} + +static struct irqaction irq_pci_error = { + irq_pci_err, SA_INTERRUPT, 0, "PCI error", NULL, NULL +}; + +__initfunc(void pcibios_init_ebsa285(void)) +{ + setup_arm_irq(IRQ_PCI_ERR, &irq_pci_error); +} + +/* + * Netwinder stuff + */ +#ifdef CONFIG_ARCH_NETWINDER + +/* + * Winbond WB83977F accessibility stuff + */ +static inline void wb977_open(void) +{ + outb(0x87, 0x370); + outb(0x87, 0x370); +} + +static inline void wb977_close(void) +{ + outb(0xaa, 0x370); +} + +static inline void wb977_wb(int reg, int val) +{ + outb(reg, 0x370); + outb(val, 0x371); +} + +static inline void wb977_ww(int reg, int val) +{ + outb(reg, 0x370); + outb(val >> 8, 0x371); + outb(reg + 1, 0x370); + outb(val, 0x371); +} + +#define wb977_device_select(dev) wb977_wb(0x07, dev) +#define wb977_device_disable() wb977_wb(0x30, 0x00) +#define wb977_device_enable() wb977_wb(0x30, 0x01) + +/* + * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE + */ +spinlock_t __netwinder_data gpio_lock = SPIN_LOCK_UNLOCKED; + +static unsigned int __netwinder_data current_gpio_op = 0; +static unsigned int __netwinder_data current_gpio_io = 0; +static unsigned int __netwinder_data current_cpld = 0; + +void __netwinder_text gpio_modify_op(int mask, int set) +{ + unsigned int new_gpio, changed; + + new_gpio = (current_gpio_op & ~mask) | set; + changed = new_gpio ^ current_gpio_op; + current_gpio_op = new_gpio; + + if (changed & 0xff) + outb(new_gpio, GP1_IO_BASE); + if (changed & 0xff00) + outb(new_gpio >> 8, GP2_IO_BASE); +} + +static inline void __gpio_modify_io(int mask, int in) +{ + unsigned int new_gpio, changed; + int port; + + new_gpio = (current_gpio_io & ~mask) | in; + changed = new_gpio ^ current_gpio_io; + current_gpio_io = new_gpio; + + changed >>= 1; + new_gpio >>= 1; + + wb977_device_select(7); + + for (port = 0xe1; changed && port < 0xe8; changed >>= 1) { + wb977_wb(port, new_gpio & 1); + + port += 1; + new_gpio >>= 1; + } + + wb977_device_select(8); + + for (port = 0xe8; changed && port < 0xec; changed >>= 1) { + wb977_wb(port, new_gpio & 1); + + port += 1; + new_gpio >>= 1; + } +} + +void __netwinder_text gpio_modify_io(int mask, int in) +{ + /* Open up the SuperIO chip */ + wb977_open(); + + __gpio_modify_io(mask, in); + + /* Close up the EFER gate */ + wb977_close(); +} + +int __netwinder_text gpio_read(void) +{ + return inb(GP1_IO_BASE) | inb(GP2_IO_BASE) << 8; +} + +/* + * Initialise the Winbond W83977F global registers + */ +static inline void wb977_init_global(void) +{ + /* + * Enable R/W config registers + */ + wb977_wb(0x26, 0x40); + + /* + * Power down FDC (not used) + */ + wb977_wb(0x22, 0xfe); + + /* + * GP12, GP11, CIRRX, IRRXH, GP10 + */ + wb977_wb(0x2a, 0xc1); + + /* + * GP23, GP22, GP21, GP20, GP13 + */ + wb977_wb(0x2b, 0x6b); + + /* + * GP17, GP16, GP15, GP14 + */ + wb977_wb(0x2c, 0x55); +} + +/* + * Initialise the Winbond W83977F printer port + */ +static inline void wb977_init_printer(void) +{ + wb977_device_select(1); + + /* + * mode 1 == EPP + */ + wb977_wb(0xf0, 0x01); +} + +/* + * Initialise the Winbond W83977F keyboard controller + */ +static inline void wb977_init_keyboard(void) +{ + wb977_device_select(5); + + /* + * Keyboard controller address + */ + wb977_ww(0x60, 0x0060); + wb977_ww(0x62, 0x0064); + + /* + * Keyboard IRQ 1, active high, edge trigger + */ + wb977_wb(0x70, 1); + wb977_wb(0x71, 0x02); + + /* + * Mouse IRQ 5, active high, edge trigger + */ + wb977_wb(0x72, 5); + wb977_wb(0x73, 0x02); + + /* + * KBC 8MHz + */ + wb977_wb(0xf0, 0x40); + + /* + * Enable device + */ + wb977_device_enable(); +} + +/* + * Initialise the Winbond W83977F Infra-Red device + */ +static inline void wb977_init_irda(void) +{ + wb977_device_select(6); + + /* + * IR base address + */ + wb977_ww(0x60, IRDA_IO_BASE); + + /* + * IRDA IRQ 6, active high, edge trigger + */ + wb977_wb(0x70, 6); + wb977_wb(0x71, 0x02); + + /* + * RX DMA - ISA DMA 0 + */ + wb977_wb(0x74, 0x00); + + /* + * TX DMA - Disable Tx DMA + */ + wb977_wb(0x75, 0x04); + + /* + * Append CRC, Enable bank selection + */ + wb977_wb(0xf0, 0x03); + + /* + * Enable device + */ + wb977_device_enable(); +} + +/* + * Initialise Winbond W83977F general purpose IO + */ +static inline void wb977_init_gpio(void) +{ + unsigned long flags; + + /* + * Set up initial I/O definitions + */ + current_gpio_io = -1; + __gpio_modify_io(-1, GPIO_DONE | GPIO_WDTIMER); + + wb977_device_select(7); + + /* + * Group1 base address + */ + wb977_ww(0x60, GP1_IO_BASE); + wb977_ww(0x62, 0); + wb977_ww(0x64, 0); + + /* + * GP10 (Orage button) IRQ 10, active high, edge trigger + */ + wb977_wb(0x70, 10); + wb977_wb(0x71, 0x02); + + /* + * GP10: Debounce filter enabled, IRQ, input + */ + wb977_wb(0xe0, 0x19); + + /* + * Enable Group1 + */ + wb977_device_enable(); + + wb977_device_select(8); + + /* + * Group2 base address + */ + wb977_ww(0x60, GP2_IO_BASE); + + /* + * Clear watchdog timer regs + * - timer disable + */ + wb977_wb(0xf2, 0x00); + + /* + * - disable LED, no mouse nor keyboard IRQ + */ + wb977_wb(0xf3, 0x00); + + /* + * - timer counting, disable power LED, disable timeouot + */ + wb977_wb(0xf4, 0x00); + + /* + * Enable group2 + */ + wb977_device_enable(); + + /* + * Set Group1/Group2 outputs + */ + spin_lock_irqsave(&gpio_lock, flags); + gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN); + spin_unlock_irqrestore(&gpio_loc, flags); +} + +/* + * Initialise the Winbond W83977F chip. + */ +__initfunc(static void wb977_init(void)) +{ + request_region(0x370, 2, "W83977AF configuration"); + + /* + * Open up the SuperIO chip + */ + wb977_open(); + + /* + * Initialise the global registers + */ + wb977_init_global(); + + /* + * Initialise the various devices in + * the multi-IO chip. + */ + wb977_init_printer(); + wb977_init_keyboard(); + wb977_init_irda(); + wb977_init_gpio(); + + /* + * Close up the EFER gate + */ + wb977_close(); +} + +void __netwinder_text cpld_modify(int mask, int set) +{ + int msk; + + current_cpld = (current_cpld & ~mask) | set; + + gpio_modify_io(GPIO_DATA, 0); + gpio_modify_op(GPIO_IOLOAD, 0); + + for (msk = 8; msk; msk >>= 1) { + int bit = current_cpld & msk; + + gpio_modify_op(GPIO_DATA | GPIO_IOCLK, bit ? GPIO_DATA : 0); + gpio_modify_op(GPIO_IOCLK, GPIO_IOCLK); + } + + gpio_modify_op(GPIO_IOCLK|GPIO_DATA, 0); + gpio_modify_op(GPIO_IOLOAD|GPIO_DSCLK, GPIO_IOLOAD|GPIO_DSCLK); + gpio_modify_op(GPIO_IOLOAD, 0); +} + +__initfunc(static void cpld_init(void)) +{ + unsigned long flags; + + spin_lock_irqsave(&gpio_lock, flags); + cpld_modify(-1, CPLD_UNMUTE | 4); + spin_unlock_irqrestore(&gpio_lock, flags); +} + +static unsigned char rwa_unlock[] __initdata = +{ 0x00, 0x00, 0x6a, 0xb5, 0xda, 0xed, 0xf6, 0xfb, 0x7d, 0xbe, 0xdf, 0x6f, 0x37, 0x1b, + 0x0d, 0x86, 0xc3, 0x61, 0xb0, 0x58, 0x2c, 0x16, 0x8b, 0x45, 0xa2, 0xd1, 0xe8, 0x74, + 0x3a, 0x9d, 0xce, 0xe7, 0x73, 0x39 }; + +#ifndef DEBUG +#define dprintk if (0) printk +#else +#define dprintk printk +#endif + +#define WRITE_RWA(r,v) do { outb((r), 0x279); outb((v), 0xa79); } while (0) + +static inline void rwa010_unlock(void) +{ + int i; + + WRITE_RWA(2, 2); + mdelay(10); + + for (i = 0; i < sizeof(rwa_unlock); i++) + outb(rwa_unlock[i], 0x279); +} + +static inline void rwa010_read_ident(void) +{ + unsigned char si[9]; + int i, j; + + WRITE_RWA(3, 0); + WRITE_RWA(0, 128); + + outb(1, 0x279); + + mdelay(10); + + dprintk("Identifier: "); + for (i = 0; i < 9; i++) { + si[i] = 0; + for (j = 0; j < 8; j++) { + int bit; + mdelay(1); + inb(0x203); + mdelay(1); + bit = inb(0x203); + dprintk("%02X ", bit); + si[i] |= bit << j; + } + mdelay(10); + dprintk("%02X ", si[i]); + } + dprintk("\n"); +} + +static inline void rwa010_global_init(void) +{ + WRITE_RWA(6, 2); // Assign a card no = 2 + + dprintk("Card no = %d\n", inb(0x203)); + + WRITE_RWA(7, 3); + WRITE_RWA(0x30, 0); + + WRITE_RWA(7, 4); + WRITE_RWA(0x30, 0); + + WRITE_RWA(7, 2); + WRITE_RWA(0x30, 0); +} + +static inline void rwa010_game_port_init(void) +{ + int i; + + WRITE_RWA(7, 5); + + dprintk("Slider base: "); + WRITE_RWA(0x61, 1); + i = inb(0x203); + + WRITE_RWA(0x60, 2); + dprintk("%02X%02X (201)\n", inb(0x203), i); + + WRITE_RWA(0x30, 1); +} + +static inline void rwa010_waveartist_init(int base, int irq, int dma) +{ + int i; + + WRITE_RWA(7, 0); + + dprintk("WaveArtist base: "); + WRITE_RWA(0x61, base); + i = inb(0x203); + + WRITE_RWA(0x60, base >> 8); + dprintk("%02X%02X (%X),", inb(0x203), i, base); + + WRITE_RWA(0x70, irq); + dprintk(" irq: %d (%d),", inb(0x203), irq); + + WRITE_RWA(0x74, dma); + dprintk(" dma: %d (%d)\n", inb(0x203), dma); + + WRITE_RWA(0x30, 1); +} + +static inline void rwa010_soundblaster_init(int sb_base, int al_base, int irq, int dma) +{ + int i; + + WRITE_RWA(7, 1); + + dprintk("SoundBlaster base: "); + WRITE_RWA(0x61, sb_base); + i = inb(0x203); + + WRITE_RWA(0x60, sb_base >> 8); + dprintk("%02X%02X (%X),", inb(0x203), i, sb_base); + + dprintk(" irq: "); + WRITE_RWA(0x70, irq); + dprintk("%d (%d),", inb(0x203), irq); + + dprintk(" 8-bit DMA: "); + WRITE_RWA(0x74, dma); + dprintk("%d (%d)\n", inb(0x203), dma); + + dprintk("AdLib base: "); + WRITE_RWA(0x63, al_base); + i = inb(0x203); + + WRITE_RWA(0x62, al_base >> 8); + dprintk("%02X%02X (%X)\n", inb(0x203), i, al_base); + + WRITE_RWA(0x30, 1); +} + +static void rwa010_soundblaster_reset(void) +{ + int i; + + outb(1, 0x226); + udelay(3); + outb(0, 0x226); + + for (i = 0; i < 5; i++) { + if (inb(0x22e) & 0x80) + break; + mdelay(1); + } + if (i == 5) + printk("SoundBlaster: DSP reset failed\n"); + + dprintk("SoundBlaster DSP reset: %02X (AA)\n", inb(0x22a)); + + for (i = 0; i < 5; i++) { + if ((inb(0x22c) & 0x80) == 0) + break; + mdelay(1); + } + + if (i == 5) + printk("SoundBlaster: DSP not ready\n"); + else { + outb(0xe1, 0x22c); + + dprintk("SoundBlaster DSP id: "); + i = inb(0x22a); + udelay(1); + i |= inb(0x22a) << 8; + dprintk("%04X\n", i); + + for (i = 0; i < 5; i++) { + if ((inb(0x22c) & 0x80) == 0) + break; + mdelay(1); + } + + if (i == 5) + printk("SoundBlaster: could not turn speaker off\n"); + + outb(0xd3, 0x22c); + } + + /* turn on OPL3 */ + outb(5, 0x38a); + outb(1, 0x38b); +} + +__initfunc(static void rwa010_init(void)) +{ + rwa010_unlock(); + rwa010_read_ident(); + rwa010_global_init(); + rwa010_game_port_init(); + rwa010_waveartist_init(0x250, 3, 7); + rwa010_soundblaster_init(0x220, 0x388, 3, 1); + rwa010_soundblaster_reset(); +} + +EXPORT_SYMBOL(gpio_lock); +EXPORT_SYMBOL(gpio_modify_op); +EXPORT_SYMBOL(gpio_modify_io); +EXPORT_SYMBOL(cpld_modify); + +#endif + +#ifdef CONFIG_LEDS +#define DEFAULT_LEDS 0 +#else +#define DEFAULT_LEDS GPIO_GREEN_LED +#endif + +__initfunc(void hw_init(void)) +{ +#ifdef CONFIG_ARCH_NETWINDER + /* + * this ought to have a better home... + * Since this calls the above routines, which are + * compiled only if CONFIG_ARCH_NETWINDER is set, + * these should only be parsed by the compiler + * in the same circumstance. + */ + if (machine_is_netwinder()) { + unsigned long flags; + + wb977_init(); + cpld_init(); + rwa010_init(); + + spin_lock_irqsave(&gpio_lock, flags); + gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS); + spin_unlock_irqrestore(&gpio_lock, flags); + } +#endif + + leds_event(led_start); +} diff --git a/arch/arm/kernel/iic.c b/arch/arm/kernel/iic.c index 6eb0122e8..c9a672a32 100644 --- a/arch/arm/kernel/iic.c +++ b/arch/arm/kernel/iic.c @@ -7,20 +7,24 @@ */ #include <linux/delay.h> +#include <linux/errno.h> #include <asm/system.h> -#include <asm/io.h> #include <asm/hardware.h> +#include <asm/io.h> +#include <asm/ioc.h> + +#define FORCE_ONES 0xdc /* * if delay loop has been calibrated then us that, * else use IOC timer 1. */ -static void iic_delay (void) +static void iic_delay(void) { extern unsigned long loops_per_sec; if (loops_per_sec != (1 << 12)) { - udelay(10); + udelay(100); /* was 10 */ return; } else { unsigned long flags; @@ -30,7 +34,7 @@ static void iic_delay (void) outb(255, IOC_T1LTCHH); outb(0, IOC_T1GO); outb(1<<6, IOC_IRQCLRA); /* clear T1 irq */ - outb(4, IOC_T1LTCHL); + outb(10, IOC_T1LTCHL); /* was 4 */ outb(0, IOC_T1LTCHH); outb(0, IOC_T1GO); while ((inb(IOC_IRQSTATA) & (1<<6)) == 0); @@ -38,124 +42,207 @@ static void iic_delay (void) } } -static inline void iic_start (void) +#define IIC_INIT() dat = (inb(IOC_CONTROL) | FORCE_ONES) & ~3 +#define IIC_SET_DAT outb(dat|=1, IOC_CONTROL); +#define IIC_CLR_DAT outb(dat&=~1, IOC_CONTROL); +#define IIC_SET_CLK outb(dat|=2, IOC_CONTROL); +#define IIC_CLR_CLK outb(dat&=~2, IOC_CONTROL); +#define IIC_DELAY iic_delay(); +#define IIC_READ_DATA() (inb(IOC_CONTROL) & 1) + +static inline void iic_set_lines(int clk, int dat) { - unsigned char out; + int old; - out = inb(IOC_CONTROL) | 0xc2; + old = inb(IOC_CONTROL) | FORCE_ONES; - outb(out, IOC_CONTROL); - iic_delay(); + old &= ~3; + + if (clk) + old |= 2; + if (dat) + old |= 1; + + outb(old, IOC_CONTROL); - outb(out ^ 1, IOC_CONTROL); iic_delay(); } -static inline void iic_stop (void) +static inline unsigned int iic_read_data(void) { - unsigned char out; + return inb(IOC_CONTROL) & 1; +} - out = inb(IOC_CONTROL) | 0xc3; +/* + * C: ==~~_ + * D: =~~__ + */ +static inline void iic_start(void) +{ + unsigned int dat; - iic_delay(); - outb(out ^ 1, IOC_CONTROL); + IIC_INIT(); - iic_delay(); - outb(out, IOC_CONTROL); + IIC_SET_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY + + IIC_CLR_DAT + IIC_DELAY + IIC_CLR_CLK + IIC_DELAY } -static int iic_sendbyte (unsigned char b) +/* + * C: __~~ + * D: =__~ + */ +static inline void iic_stop(void) { - unsigned char out, in; - int i; + unsigned int dat; - out = (inb(IOC_CONTROL) & 0xfc) | 0xc0; + IIC_INIT(); - outb(out, IOC_CONTROL); - for (i = 7; i >= 0; i--) { - unsigned char c; - c = out | ((b & (1 << i)) ? 1 : 0); + IIC_CLR_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY + IIC_SET_DAT + IIC_DELAY +} - outb(c, IOC_CONTROL); - iic_delay(); +/* + * C: __~_ + * D: =___ + */ +static inline void iic_acknowledge(void) +{ + unsigned int dat; - outb(c | 2, IOC_CONTROL); - iic_delay(); + IIC_INIT(); - outb(c, IOC_CONTROL); - } - outb(out | 1, IOC_CONTROL); - iic_delay(); + IIC_CLR_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY + IIC_CLR_CLK + IIC_DELAY +} - outb(out | 3, IOC_CONTROL); - iic_delay(); +/* + * C: __~_ + * D: =~H~ + */ +static inline int iic_is_acknowledged(void) +{ + unsigned int dat, ack_bit; - in = inb(IOC_CONTROL) & 1; + IIC_INIT(); - outb(out | 1, IOC_CONTROL); - iic_delay(); + IIC_SET_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY - outb(out, IOC_CONTROL); - iic_delay(); + ack_bit = IIC_READ_DATA(); + + IIC_CLR_CLK + IIC_DELAY + + return ack_bit == 0; +} + +/* + * C: _~__~__~__~__~__~__~__~_ + * D: =DDXDDXDDXDDXDDXDDXDDXDD + */ +static void iic_sendbyte(unsigned int b) +{ + unsigned int dat, i; + + IIC_INIT(); + + for (i = 0; i < 8; i++) { + if (b & 128) + IIC_SET_DAT + else + IIC_CLR_DAT + IIC_DELAY + + IIC_SET_CLK + IIC_DELAY + IIC_CLR_CLK + IIC_DELAY - if(in) { - printk("No acknowledge from RTC\n"); - return 1; - } else - return 0; + b <<= 1; + } } -static unsigned char iic_recvbyte (void) +/* + * C: __~_~_~_~_~_~_~_~_ + * D: =~HHHHHHHHHHHHHHHH + */ +static unsigned char iic_recvbyte(void) { - unsigned char out, in; - int i; + unsigned int dat, i, in; - out = (inb(IOC_CONTROL) & 0xfc) | 0xc0; + IIC_INIT(); + + IIC_SET_DAT + IIC_DELAY - outb(out, IOC_CONTROL); in = 0; - for (i = 7; i >= 0; i--) { - outb(out | 1, IOC_CONTROL); - iic_delay(); - outb(out | 3, IOC_CONTROL); - iic_delay(); - in = (in << 1) | (inb(IOC_CONTROL) & 1); - outb(out | 1, IOC_CONTROL); - iic_delay(); + for (i = 0; i < 8; i++) { + IIC_SET_CLK + IIC_DELAY + + in = (in << 1) | IIC_READ_DATA(); + + IIC_CLR_CLK + IIC_DELAY } - outb(out, IOC_CONTROL); - iic_delay(); - outb(out | 2, IOC_CONTROL); - iic_delay(); return in; } -void iic_control (unsigned char addr, unsigned char loc, unsigned char *buf, int len) +int iic_control (unsigned char addr, unsigned char loc, unsigned char *buf, int len) { - iic_start(); + int i, err = -EIO; - if (iic_sendbyte(addr & 0xfe)) + iic_start(); + iic_sendbyte(addr & 0xfe); + if (!iic_is_acknowledged()) goto error; - if (iic_sendbyte(loc)) + iic_sendbyte(loc); + if (!iic_is_acknowledged()) goto error; if (addr & 1) { - int i; - - for (i = 0; i < len; i++) - if (iic_sendbyte (buf[i])) - break; - } else { - int i; - iic_stop(); iic_start(); iic_sendbyte(addr|1); - for (i = 0; i < len; i++) - buf[i] = iic_recvbyte (); + if (!iic_is_acknowledged()) + goto error; + + for (i = 0; i < len - 1; i++) { + buf[i] = iic_recvbyte(); + iic_acknowledge(); + } + buf[i] = iic_recvbyte(); + } else { + for (i = 0; i < len; i++) { + iic_sendbyte(buf[i]); + + if (!iic_is_acknowledged()) + goto error; + } } + + err = 0; error: iic_stop(); + + return err; } diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index 99577f1b7..5d09ea540 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -6,9 +6,10 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; +static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; -struct mm_struct init_mm = INIT_MM; +struct mm_struct init_mm = INIT_MM(init_mm); /* * Initial task structure. @@ -20,4 +21,5 @@ struct mm_struct init_mm = INIT_MM; * * The things we do for performance.. */ -union task_union init_task_union __attribute__((__section__(".init.task"))) = { INIT_TASK }; +union task_union init_task_union __attribute__((__section__(".init.task"))) = + { INIT_TASK(init_task_union.task) }; diff --git a/arch/arm/kernel/ioport.c b/arch/arm/kernel/ioport.c deleted file mode 100644 index d375dcbdd..000000000 --- a/arch/arm/kernel/ioport.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * linux/arch/arm/kernel/ioport.c - * - * Io-port support is not used for ARM - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/ioport.h> - -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -/*asmlinkage void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) -{ -}*/ - -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) -{ - return -ENOSYS; -} - -asmlinkage int sys_iopl(long ebx,long ecx,long edx, - long esi, long edi, long ebp, long eax, long ds, - long es, long fs, long gs, long orig_eax, - long eip,long cs,long eflags,long esp,long ss) -{ - return -ENOSYS; -} diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 332e8940d..ee6e07c6c 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -23,7 +23,6 @@ #include <linux/sched.h> #include <linux/ioport.h> #include <linux/interrupt.h> -#include <linux/timex.h> #include <linux/malloc.h> #include <linux/random.h> #include <linux/smp.h> @@ -32,7 +31,6 @@ #include <asm/hardware.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/system.h> #ifndef SMP @@ -46,10 +44,22 @@ #define cliIF() #endif +/* + * Maximum IRQ count. Currently, this is arbitary. + * However, it should not be set too low to prevent + * false triggering. Conversely, if it is set too + * high, then you could miss a stuck IRQ. + * + * Maybe we ought to set a timer and re-enable the + * IRQ at a later time? + */ +#define MAX_IRQ_CNT 100000 + unsigned int local_bh_count[NR_CPUS]; unsigned int local_irq_count[NR_CPUS]; spinlock_t irq_controller_lock; +int setup_arm_irq(int, struct irqaction *); extern int get_fiq_list(char *); extern void init_FIQ(void); @@ -60,17 +70,29 @@ struct irqdesc { unsigned int probing : 1; /* IRQ in use for a probe */ unsigned int probe_ok : 1; /* IRQ can be used for probe */ unsigned int valid : 1; /* IRQ claimable */ - unsigned int unused :26; + unsigned int noautoenable : 1; /* don't automatically enable IRQ */ + unsigned int unused :25; void (*mask_ack)(unsigned int irq); /* Mask and acknowledge IRQ */ void (*mask)(unsigned int irq); /* Mask IRQ */ void (*unmask)(unsigned int irq); /* Unmask IRQ */ struct irqaction *action; - unsigned int unused2[3]; + /* + * IRQ lock detection + */ + unsigned int lck_cnt; + unsigned int lck_pc; + unsigned int lck_jif; }; static struct irqdesc irq_desc[NR_IRQS]; /* + * Get architecture specific interrupt handlers + * and interrupt initialisation. + */ +#include <asm/arch/irq.h> + +/* * Dummy mask/unmask handler */ static void dummy_mask_unmask_irq(unsigned int irq) @@ -94,10 +116,12 @@ void enable_irq(unsigned int irq) spin_lock_irqsave(&irq_controller_lock, flags); cliIF(); - irq_desc[irq].enabled = 1; irq_desc[irq].probing = 0; irq_desc[irq].triggered = 0; - irq_desc[irq].unmask(irq); + if (!irq_desc[irq].noautoenable) { + irq_desc[irq].enabled = 1; + irq_desc[irq].unmask(irq); + } spin_unlock_irqrestore(&irq_controller_lock, flags); } @@ -119,21 +143,52 @@ int get_irq_list(char *buf) *p++ = '\n'; } -#ifdef CONFIG_ACORN +#ifdef CONFIG_ARCH_ACORN p += get_fiq_list(p); #endif return p - buf; } /* + * IRQ lock detection. + * + * Hopefully, this should get us out of a few locked situations. + * However, it may take a while for this to happen, since we need + * a large number if IRQs to appear in the same jiffie with the + * same instruction pointer (or within 2 instructions). + */ +static void check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs) +{ + unsigned long instr_ptr = instruction_pointer(regs); + + if (desc->lck_jif == jiffies && + desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) { + desc->lck_cnt += 1; + + if (desc->lck_cnt > MAX_IRQ_CNT) { + printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq); + disable_irq(irq); + } + } else { + desc->lck_cnt = 0; + desc->lck_pc = instruction_pointer(regs); + desc->lck_jif = jiffies; + } +} + +/* * do_IRQ handles all normal device IRQ's */ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { - struct irqdesc * desc = irq_desc + irq; + struct irqdesc * desc; struct irqaction * action; int status, cpu; + irq = fixup_irq(irq); + + desc = irq_desc + irq; + spin_lock(&irq_controller_lock); desc->mask_ack(irq); spin_unlock(&irq_controller_lock); @@ -174,6 +229,12 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) } } + /* + * Debug measure - hopefully we can continue if an + * IRQ lockup problem occurs... + */ + check_irq_lock(desc, irq, regs); + irq_exit(cpu, irq); /* @@ -181,15 +242,10 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) * a return code from the irq handler to tell us * whether the handler wants us to do software bottom * half handling or not.. - * - * ** IMPORTANT NOTE: do_bottom_half() ENABLES IRQS!!! ** - * ** WE MUST DISABLE THEM AGAIN, ELSE IDE DISKS GO ** - * ** AWOL ** */ if (1) { if (bh_active & bh_mask) do_bottom_half(); - __cli(); } } @@ -227,11 +283,27 @@ int setup_arm_irq(int irq, struct irqaction * new) struct irqaction *old, **p; unsigned long flags; - if (new->flags & SA_SAMPLE_RANDOM) + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ rand_initialize_irq(irq); + } + /* + * The following block of code has to be executed atomically + */ spin_lock_irqsave(&irq_controller_lock, flags); - p = &irq_desc[irq].action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ @@ -252,28 +324,24 @@ int setup_arm_irq(int irq, struct irqaction * new) if (!shared) { irq_desc[irq].nomask = (new->flags & SA_IRQNOMASK) ? 1 : 0; - irq_desc[irq].enabled = 1; irq_desc[irq].probing = 0; - irq_desc[irq].unmask(irq); + if (!irq_desc[irq].noautoenable) { + irq_desc[irq].enabled = 1; + irq_desc[irq].unmask(irq); + } } spin_unlock_irqrestore(&irq_controller_lock, flags); return 0; } -/* - * Using "struct sigaction" is slightly silly, but there - * are historical reasons and it works well, so.. - */ int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long irq_flags, const char * devname, void *dev_id) { unsigned long retval; struct irqaction *action; - if (!irq_desc[irq].valid) - return -EINVAL; - if (!handler) + if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler) return -EINVAL; action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL); @@ -299,28 +367,30 @@ void free_irq(unsigned int irq, void *dev_id) struct irqaction * action, **p; unsigned long flags; - if (!irq_desc[irq].valid) { + if (irq >= NR_IRQS || !irq_desc[irq].valid) { printk(KERN_ERR "Trying to free IRQ%d\n",irq); #ifdef CONFIG_DEBUG_ERRORS __backtrace(); #endif return; } + + spin_lock_irqsave(&irq_controller_lock, flags); for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { if (action->dev_id != dev_id) continue; /* Found it - now free it */ - save_flags_cli (flags); *p = action->next; - restore_flags (flags); kfree(action); - return; + goto out; } printk(KERN_ERR "Trying to free free IRQ%d\n",irq); #ifdef CONFIG_DEBUG_ERRORS __backtrace(); #endif +out: + spin_unlock_irqrestore(&irq_controller_lock, flags); } /* Start the interrupt probing. Unlike other architectures, @@ -346,7 +416,6 @@ unsigned long probe_irq_on(void) continue; irq_desc[i].probing = 1; - irq_desc[i].enabled = 1; irq_desc[i].triggered = 0; irq_desc[i].unmask(i); irqs += 1; @@ -364,7 +433,8 @@ unsigned long probe_irq_on(void) */ spin_lock_irq(&irq_controller_lock); for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && irq_desc[i].triggered) { + if (irq_desc[i].probing && + irq_desc[i].triggered) { irq_desc[i].probing = 0; irqs -= 1; } @@ -383,7 +453,7 @@ unsigned long probe_irq_on(void) int probe_irq_off(unsigned long irqs) { unsigned int i; - int irq_found = -1; + int irq_found = NO_IRQ; /* * look at the interrupts, and find exactly one @@ -393,7 +463,7 @@ int probe_irq_off(unsigned long irqs) for (i = 0; i < NR_IRQS; i++) { if (irq_desc[i].probing && irq_desc[i].triggered) { - if (irq_found != -1) { + if (irq_found != NO_IRQ) { irq_found = NO_IRQ; goto out; } @@ -405,21 +475,19 @@ int probe_irq_off(unsigned long irqs) irq_found = NO_IRQ; out: spin_unlock_irq(&irq_controller_lock); + return irq_found; } -/* - * Get architecture specific interrupt handlers - * and interrupt initialisation. - */ -#include <asm/arch/irq.h> - __initfunc(void init_IRQ(void)) { extern void init_dma(void); int irq; for (irq = 0; irq < NR_IRQS; irq++) { + irq_desc[irq].probe_ok = 0; + irq_desc[irq].valid = 0; + irq_desc[irq].noautoenable = 0; irq_desc[irq].mask_ack = dummy_mask_unmask_irq; irq_desc[irq].mask = dummy_mask_unmask_irq; irq_desc[irq].unmask = dummy_mask_unmask_irq; diff --git a/arch/arm/kernel/leds-ebsa110.c b/arch/arm/kernel/leds-ebsa110.c index cc2f7a91d..eb286347b 100644 --- a/arch/arm/kernel/leds-ebsa110.c +++ b/arch/arm/kernel/leds-ebsa110.c @@ -7,11 +7,13 @@ * * - Red - toggles state every 50 timer interrupts */ +#include <linux/module.h> + #include <asm/hardware.h> #include <asm/leds.h> #include <asm/system.h> -void leds_event(led_event_t ledevt) +void ebsa110_leds_event(led_event_t ledevt) { unsigned long flags; @@ -28,3 +30,7 @@ void leds_event(led_event_t ledevt) restore_flags(flags); } + +void (*leds_event)(led_event_t) = ebsa110_leds_event; + +EXPORT_SYMBOL(leds_event); diff --git a/arch/arm/kernel/leds-ebsa285.c b/arch/arm/kernel/leds-ebsa285.c deleted file mode 100644 index a8cf2e775..000000000 --- a/arch/arm/kernel/leds-ebsa285.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * arch/arm/kernel/leds-ebsa285.c - * - * Copyright (C) 1998 Russell King - * - * EBSA-285 LED control routines. We use the leds as follows: - * - * - Green - toggles state every 50 timer interrupts - * - Amber - On if system is not idle - * - Red - currently unused - */ -#include <asm/hardware.h> -#include <asm/leds.h> -#include <asm/system.h> - -static char led_state = XBUS_LED_RED | XBUS_LED_GREEN; - -void leds_event(led_event_t ledevt) -{ - unsigned long flags; - - save_flags_cli(flags); - - switch(ledevt) { - case led_idle_start: - led_state |= XBUS_LED_AMBER; - break; - - case led_idle_end: - led_state &= ~XBUS_LED_AMBER; - break; - - case led_timer: - led_state ^= XBUS_LED_GREEN; - break; - - default: - break; - } - - restore_flags(flags); - - *XBUS_LEDS = led_state; -} diff --git a/arch/arm/kernel/leds-footbridge.c b/arch/arm/kernel/leds-footbridge.c new file mode 100644 index 000000000..cb6c7f4b4 --- /dev/null +++ b/arch/arm/kernel/leds-footbridge.c @@ -0,0 +1,249 @@ +/* + * arch/arm/kernel/leds-footbridge.c + * + * Copyright (C) 1998-1999 Russell King + * + * EBSA-285 and NetWinder LED control routines. + * + * The EBSA-285 uses the leds as follows: + * - Green - toggles state every 50 timer interrupts + * - Amber - On if system is not idle + * - Red - currently unused + * + * The Netwinder uses the leds as follows: + * - Green - toggles state every 50 timer interrupts + * - Red - On if the system is not idle + * + * Changelog: + * 02-05-1999 RMK Various cleanups + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <asm/hardware.h> +#include <asm/leds.h> +#include <asm/spinlock.h> +#include <asm/system.h> + +#define LED_STATE_ENABLED 1 +#define LED_STATE_CLAIMED 2 +static char led_state; +static char hw_led_state; + +static spinlock_t leds_lock = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_ARCH_EBSA285 + +static void __ebsa285_text ebsa285_leds_event(led_event_t evt) +{ + unsigned long flags; + + spin_lock_irqsave(&leds_lock, flags); + + switch (evt) { + case led_start: + hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN; +#ifndef CONFIG_LEDS_IDLE + hw_led_state |= XBUS_LED_AMBER; +#endif + led_state |= LED_STATE_ENABLED; + break; + + case led_stop: + led_state &= ~LED_STATE_ENABLED; + break; + + case led_claim: + led_state |= LED_STATE_CLAIMED; + hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN | XBUS_LED_AMBER; + break; + + case led_release: + led_state &= ~LED_STATE_CLAIMED; + hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN | XBUS_LED_AMBER; + break; + +#ifdef CONFIG_LEDS_TIMER + case led_timer: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state ^= XBUS_LED_GREEN; + break; +#endif + +#ifdef CONFIG_LEDS_CPU + case led_idle_start: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state |= XBUS_LED_RED; + break; + + case led_idle_end: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state &= ~XBUS_LED_RED; + break; +#endif + + case led_green_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~XBUS_LED_GREEN; + break; + + case led_green_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= XBUS_LED_GREEN; + break; + + case led_amber_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~XBUS_LED_AMBER; + break; + + case led_amber_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= XBUS_LED_AMBER; + break; + + case led_red_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~XBUS_LED_RED; + break; + + case led_red_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= XBUS_LED_RED; + break; + + default: + break; + } + + if (led_state & LED_STATE_ENABLED) + *XBUS_LEDS = hw_led_state; + + spin_unlock_irqrestore(&leds_lock, flags); +} + +#endif + +#ifdef CONFIG_ARCH_NETWINDER + +static void __netwinder_text netwinder_leds_event(led_event_t evt) +{ + unsigned long flags; + + spin_lock_irqsave(&leds_lock, flags); + + switch (evt) { + case led_start: + led_state |= LED_STATE_ENABLED; + hw_led_state = 0; + break; + + case led_stop: + led_state &= ~LED_STATE_ENABLED; + break; + + case led_claim: + led_state |= LED_STATE_CLAIMED; + hw_led_state = 0; + break; + + case led_release: + led_state &= ~LED_STATE_CLAIMED; + hw_led_state = 0; + break; + +#ifdef CONFIG_LEDS_TIMER + case led_timer: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state ^= GPIO_GREEN_LED; + break; +#endif + +#ifdef CONFIG_LEDS_CPU + case led_idle_start: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state &= ~GPIO_RED_LED; + break; + + case led_idle_end: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state |= GPIO_RED_LED; + break; +#endif + + case led_green_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= GPIO_GREEN_LED; + break; + + case led_green_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~GPIO_GREEN_LED; + break; + + case led_amber_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= GPIO_GREEN_LED | GPIO_RED_LED; + break; + + case led_amber_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~(GPIO_GREEN_LED | GPIO_RED_LED); + break; + + case led_red_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= GPIO_RED_LED; + break; + + case led_red_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~GPIO_RED_LED; + break; + + default: + break; + } + + spin_unlock_irqrestore(&leds_lock, flags); + + if (led_state & LED_STATE_ENABLED) { + spin_lock_irqsave(&gpio_lock, flags); + gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state); + spin_unlock_irqrestore(&gpio_lock, flags); + } +} + +#endif + +static void dummy_leds_event(led_event_t evt) +{ +} + +__initfunc(void +init_leds_event(led_event_t evt)) +{ + switch (machine_arch_type) { +#ifdef CONFIG_ARCH_EBSA285 + case MACH_TYPE_EBSA285: + leds_event = ebsa285_leds_event; + break; +#endif +#ifdef CONFIG_ARCH_NETWINDER + case MACH_TYPE_NETWINDER: + leds_event = netwinder_leds_event; + break; +#endif + + default: + leds_event = dummy_leds_event; + } + + leds_event(evt); +} + +void (*leds_event)(led_event_t) = init_leds_event; + +EXPORT_SYMBOL(leds_event); diff --git a/arch/arm/kernel/oldlatches.c b/arch/arm/kernel/oldlatches.c index c4674cd35..a908241d2 100644 --- a/arch/arm/kernel/oldlatches.c +++ b/arch/arm/kernel/oldlatches.c @@ -4,6 +4,7 @@ * (c) David Alan Gilbert 1995/1996 */ #include <linux/kernel.h> +#include <linux/init.h> #include <asm/io.h> #include <asm/hardware.h> @@ -40,7 +41,7 @@ void oldlatch_bupdate(unsigned char mask,unsigned char newdata) } #endif -void oldlatch_init(void) +void __init oldlatch_init(void) { printk("oldlatch: init\n"); #ifdef LATCHAADDR diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 6ea02d891..68bf5aa1f 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -34,7 +34,6 @@ #include <linux/init.h> #include <asm/uaccess.h> -#include <asm/pgtable.h> #include <asm/system.h> #include <asm/io.h> @@ -55,46 +54,37 @@ void enable_hlt(void) } /* - * The idle loop on an arm.. + * The idle loop on an ARM... */ asmlinkage int sys_idle(void) { - int ret = -EPERM; - - lock_kernel(); if (current->pid != 0) - goto out; + return -EPERM; + /* endless idle loop with no priority at all */ - current->priority = -100; - for (;;) - { + while (1) { + if (!current->need_resched && !hlt_counter) + proc_idle(); + current->policy = SCHED_YIELD; + schedule(); +#ifndef CONFIG_NO_PGT_CACHE check_pgt_cache(); -#if 0 //def ARCH_IDLE_OK - if (!hlt_counter && !current->need_resched) - proc_idle (); #endif - run_task_queue(&tq_scheduler); - schedule(); } - ret = 0; -out: - unlock_kernel(); - return ret; } +static char reboot_mode = 'h'; + __initfunc(void reboot_setup(char *str, int *ints)) { + reboot_mode = str[0]; } -/* - * This routine reboots the machine by resetting the expansion cards via - * their loaders, turning off the processor cache (if ARM3), copying the - * first instruction of the ROM to 0, and executing it there. - */ void machine_restart(char * __unused) { - proc_hard_reset (); - arch_hard_reset (); + arch_reset(reboot_mode); + panic("Reboot failed\n"); + while (1); } void machine_halt(void) @@ -150,6 +140,67 @@ void show_regs(struct pt_regs * regs) } /* + * Task structure and kernel stack allocation. + * + * Taken from the i386 version. + */ +#ifdef CONFIG_CPU_32 +#define EXTRA_TASK_STRUCT 8 +static struct task_struct *task_struct_stack[EXTRA_TASK_STRUCT]; +static int task_struct_stack_ptr = -1; +#endif + +struct task_struct *alloc_task_struct(void) +{ + struct task_struct *tsk; + +#ifndef EXTRA_TASK_STRUCT + tsk = ll_alloc_task_struct(); +#else + int index; + + index = task_struct_stack_ptr; + if (index >= EXTRA_TASK_STRUCT/2) + goto use_cache; + + tsk = ll_alloc_task_struct(); + + if (!tsk) { + index = task_struct_stack_ptr; + + if (index >= 0) { +use_cache: tsk = task_struct_stack[index]; + task_struct_stack_ptr = index - 1; + } + } +#endif +#ifdef CONFIG_SYSRQ + /* You need this if you want SYSRQ-T to give sensible stack + * usage information + */ + if (tsk) { + char *p = (char *)tsk; + memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE); + } +#endif + + return tsk; +} + +void free_task_struct(struct task_struct *p) +{ +#ifdef EXTRA_TASK_STRUCT + int index = task_struct_stack_ptr + 1; + + if (index < EXTRA_TASK_STRUCT) { + task_struct_stack[index] = p; + task_struct_stack_ptr = index; + } else +#endif + ll_free_task_struct(p); +} + +/* * Free current thread data structures etc.. */ void exit_thread(void) @@ -179,9 +230,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, childregs = ((struct pt_regs *)((unsigned long)p + 8192)) - 1; *childregs = *regs; childregs->ARM_r0 = 0; + childregs->ARM_sp = esp; save = ((struct context_save_struct *)(childregs)) - 1; - copy_thread_css(save); + init_thread_css(save); p->tss.save = save; return 0; @@ -224,3 +276,29 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs = *regs; dump->u_fpvalid = dump_fpu (regs, &dump->u_fp); } + +/* + * This is the mechanism for creating a new kernel thread. + * + * NOTE! Only a kernel-only process(ie the swapper or direct descendants + * who haven't done an "execve()") should use this: it will work within + * a system call from a "real" process, but the process memory space will + * not be free'd until both the parent and the child have exited. + */ +pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + extern int sys_exit(int) __attribute__((noreturn)); + pid_t __ret; + + __asm__ __volatile__( + "mov r0, %1 @ kernel_thread sys_clone\n" +" mov r1, #0\n" + __syscall(clone)"\n" +" mov %0, r0" + : "=r" (__ret) + : "Ir" (flags | CLONE_VM) : "r0", "r1"); + if (__ret == 0) + sys_exit((fn)(arg)); + return __ret; +} + diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 5e3bdfe3b..fbc3a2187 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -34,11 +34,11 @@ */ static inline long get_stack_long(struct task_struct *task, int offset) { - unsigned char *stack; + struct pt_regs *regs; - stack = (unsigned char *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); - stack += offset << 2; - return *(unsigned long *)stack; + regs = (struct pt_regs *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); + + return regs->uregs[offset]; } /* @@ -50,11 +50,12 @@ static inline long get_stack_long(struct task_struct *task, int offset) static inline long put_stack_long(struct task_struct *task, int offset, unsigned long data) { - unsigned char *stack; + struct pt_regs *regs; + + regs = (struct pt_regs *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); + + regs->uregs[offset] = data; - stack = (unsigned char *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); - stack += offset << 2; - *(unsigned long *) stack = data; return 0; } @@ -157,11 +158,16 @@ repeat: if (MAP_NR(page) < max_mapnr) { page += addr & ~PAGE_MASK; + + flush_cache_range(vma->vm_mm, addr, addr + sizeof(unsigned long)); + *(unsigned long *)page = data; - __flush_entry_to_ram(page); + + clean_cache_area(page, sizeof(unsigned long)); + + set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + flush_tlb_page(vma, addr & PAGE_MASK); } - set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - flush_tlb(); } /* @@ -343,8 +349,7 @@ printk ("op2=r%02ldsh%dx%d", insn & 15, shift, type); printk ("=%08lX ", val); return val; } -#undef pc_pointer -#define pc_pointer(x) ((x) & 0x03fffffc) + int ptrace_set_bpt (struct task_struct *child) { unsigned long insn, pc, alt; @@ -651,7 +656,6 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) return 0; wake_up_process (child); child->exit_code = SIGKILL; - ptrace_cancel_bpt (child); /* make sure single-step breakpoint is gone. */ ptrace_cancel_bpt (child); ret = 0; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index cddc3fab3..0b0a70087 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -56,12 +56,17 @@ #define SUPPORT_CPU_SA110 #endif -#ifndef CONFIG_CMDLINE -#define CONFIG_CMDLINE "root=/dev/nfs rw" -#endif #define MEM_SIZE (16*1024*1024) #define COMMAND_LINE_SIZE 256 +#ifndef CONFIG_CMDLINE +#define CONFIG_CMDLINE "" +#endif + +extern void reboot_setup(char *str, int *ints); +extern void fpe_init(void); +extern void disable_hlt(void); + struct drive_info_struct { char dummy[32]; } drive_info; struct screen_info screen_info = { orig_video_lines: 30, @@ -87,20 +92,26 @@ const struct armversions armidlist[] = { /*-- Match -- --- Mask -- -- Manu -- Processor uname -m --- ELF STUFF --- --- processor asm funcs --- */ #if defined(CONFIG_CPU_26) + /* ARM2 fake ident */ { 0x41560200, 0xfffffff0, "ARM/VLSI", "arm2" , "armv1" , "v1", 0, &arm2_processor_functions }, + /* ARM250 fake ident */ { 0x41560250, 0xfffffff0, "ARM/VLSI", "arm250" , "armv2" , "v2", HWCAP_SWP, &arm250_processor_functions }, + /* ARM3 processors */ { 0x41560300, 0xfffffff0, "ARM/VLSI", "arm3" , "armv2" , "v2", HWCAP_SWP, &arm3_processor_functions }, #elif defined(CONFIG_CPU_32) #ifdef SUPPORT_CPU_ARM6 + /* ARM6 */ { 0x41560600, 0xfffffff0, "ARM/VLSI", "arm6" , "armv3" , "v3", HWCAP_SWP, &arm6_processor_functions }, + /* ARM610 */ { 0x41560610, 0xfffffff0, "ARM/VLSI", "arm610" , "armv3" , "v3", HWCAP_SWP, &arm6_processor_functions }, #endif #ifdef SUPPORT_CPU_ARM7 + /* ARM7's have a strange numbering */ { 0x41007000, 0xffffff00, "ARM/VLSI", "arm7" , "armv3" , "v3", HWCAP_SWP, &arm7_processor_functions }, /* ARM710 IDs are non-standard */ @@ -108,10 +119,16 @@ const struct armversions armidlist[] = { &arm7_processor_functions }, #endif #ifdef SUPPORT_CPU_SA110 - { 0x4401a100, 0xfffffff0, "DEC", "sa110" , "armv4" , "v3", HWCAP_SWP|HWCAP_HALF, +#ifdef CONFIG_ARCH_RPC + /* Acorn RiscPC's can't handle ARMv4 half-word instructions */ + { 0x4401a100, 0xfffffff0, "Intel", "sa110" , "armv4" , "v4", HWCAP_SWP, + &sa110_processor_functions }, +#else + { 0x4401a100, 0xfffffff0, "Intel", "sa110" , "armv4" , "v4", HWCAP_SWP|HWCAP_HALF, &sa110_processor_functions }, #endif #endif +#endif { 0x00000000, 0x00000000, "***", "unknown", "unknown", "**", 0, NULL } }; @@ -119,7 +136,7 @@ const struct armversions armidlist[] = { * From head-armv.S */ unsigned int processor_id; -unsigned int machine_type; +unsigned int __machine_arch_type; int armidindex; extern int root_mountflags; @@ -132,139 +149,10 @@ extern int _etext, _edata, _end; */ /* - * Risc-PC specific initialisation - */ -#ifdef CONFIG_ARCH_RPC - -#include <asm/arch/mmu.h> - -unsigned int vram_half_sam; - -static void -setup_rpc(struct param_struct *params) -{ - extern void init_dram_banks(const struct param_struct *params); - - init_dram_banks(params); - - switch (params->u1.s.pages_in_vram) { - case 256: - vram_half_sam = 1024; - break; - case 512: - default: - vram_half_sam = 2048; - } -} -#else -#define setup_rpc(x) -#endif - -#ifdef PARAMS_BASE - -#ifdef CONFIG_ARCH_ACORN -int memc_ctrl_reg; -int number_ide_drives; -int number_mfm_drives; -#endif - -static struct param_struct *params = (struct param_struct *)PARAMS_BASE; - -__initfunc(static char * -setup_params(unsigned long *mem_end_p)) -{ - ROOT_DEV = to_kdev_t(params->u1.s.rootdev); - ORIG_X = params->u1.s.video_x; - ORIG_Y = params->u1.s.video_y; - ORIG_VIDEO_COLS = params->u1.s.video_num_cols; - ORIG_VIDEO_LINES = params->u1.s.video_num_rows; - -#ifdef CONFIG_ARCH_ACORN -#ifndef CONFIG_FB - { - extern int bytes_per_char_h; - extern int bytes_per_char_v; - - bytes_per_char_h = params->u1.s.bytes_per_char_h; - bytes_per_char_v = params->u1.s.bytes_per_char_v; - } -#endif - memc_ctrl_reg = params->u1.s.memc_control_reg; - number_ide_drives = (params->u1.s.adfsdrives >> 6) & 3; - number_mfm_drives = (params->u1.s.adfsdrives >> 3) & 3; - - setup_rpc(params); - - if (!(params->u1.s.flags & FLAG_READONLY)) - root_mountflags &= ~MS_RDONLY; -#endif -#ifdef CONFIG_BLK_DEV_RAM - { - extern int rd_doload; - extern int rd_prompt; - extern int rd_image_start; - - rd_image_start = params->u1.s.rd_start; - rd_prompt = (params->u1.s.flags & FLAG_RDPROMPT) == 0; - rd_doload = (params->u1.s.flags & FLAG_RDLOAD) == 0; - } -#endif - -#ifdef CONFIG_ARCH_ACORN - *mem_end_p = GET_MEMORY_END(params); -#elif defined(CONFIG_ARCH_EBSA285) - *mem_end_p = PAGE_OFFSET + params->u1.s.page_size * params->u1.s.nr_pages; -#else - *mem_end_p = PAGE_OFFSET + MEM_SIZE; -#endif - - return params->commandline; -} - -#else - -static char default_command_line[] __initdata = CONFIG_CMDLINE; - -__initfunc(static char * -setup_params(unsigned long *mem_end_p)) -{ - ROOT_DEV = 0x00ff; - -#ifdef CONFIG_BLK_DEV_RAM - { - extern int rd_doload; - extern int rd_prompt; - extern int rd_image_start; - - rd_image_start = 0; - rd_prompt = 1; - rd_doload = 1; - } -#endif - - *mem_end_p = PAGE_OFFSET + MEM_SIZE; - - return default_command_line; -} -#endif - -/* * initial ram disk */ #ifdef CONFIG_BLK_DEV_INITRD __initfunc(static void -setup_initrd(const struct param_struct *params)) -{ - if (params->u1.s.initrd_start) { - initrd_start = params->u1.s.initrd_start; - initrd_end = initrd_start + params->u1.s.initrd_size; - } else { - initrd_start = 0; - initrd_end = 0; - } -} - -__initfunc(static void check_initrd(unsigned long mem_start, unsigned long mem_end)) { if (initrd_end > mem_end) { @@ -276,7 +164,6 @@ check_initrd(unsigned long mem_start, unsigned long mem_end)) } #else -#define setup_initrd(p) #define check_initrd(ms,me) #endif @@ -289,48 +176,47 @@ setup_processor(void)) armidlist[armidindex].mask) armidindex += 1; - if (armidlist[armidindex].id == 0) { -#ifdef CONFIG_ARCH_ACORN - int i; - - for (i = 0; i < 3200; i++) - ((unsigned long *)SCREEN2_BASE)[i] = 0x77113322; -#endif + if (armidlist[armidindex].id == 0) while (1); - } processor = *armidlist[armidindex].proc; processor._proc_init(); } +static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; static char command_line[COMMAND_LINE_SIZE] = { 0, }; char saved_command_line[COMMAND_LINE_SIZE]; __initfunc(static void -setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_end)) +setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_sz)) { - char c, *to = command_line; + char c = ' ', *to = command_line; int len = 0; *mem_start = (unsigned long)&_end; for (;;) { - if (cmd_line[0] == ' ' && - cmd_line[1] == 'm' && - cmd_line[2] == 'e' && - cmd_line[3] == 'm' && - cmd_line[4] == '=') { - *mem_end = simple_strtoul(cmd_line+5, &cmd_line, 0); - switch(*cmd_line) { - case 'M': - case 'm': - *mem_end <<= 10; - case 'K': - case 'k': - *mem_end <<= 10; + if (c == ' ') { + if (cmd_line[0] == 'm' && + cmd_line[1] == 'e' && + cmd_line[2] == 'm' && + cmd_line[3] == '=') { + *mem_sz = simple_strtoul(cmd_line+4, &cmd_line, 0); + switch(*cmd_line) { + case 'M': + case 'm': + *mem_sz <<= 10; + case 'K': + case 'k': + *mem_sz <<= 10; + cmd_line++; + } + } + /* if there are two spaces, remove one */ + if (*cmd_line == ' ') { cmd_line++; + continue; } - *mem_end = *mem_end + PAGE_OFFSET; } c = *cmd_line++; if (!c) @@ -341,42 +227,222 @@ setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_end)) } *to = '\0'; + + /* remove trailing spaces */ + while (*--to == ' ' && to != command_line) + *to = '\0'; +} + +__initfunc(static void +setup_ram(int doload, int prompt, int image_start)) +{ +#ifdef CONFIG_BLK_DEV_RAM + extern int rd_doload; + extern int rd_prompt; + extern int rd_image_start; + + rd_image_start = image_start; + rd_prompt = prompt; + rd_doload = doload; +#endif } +/* + * initial ram disk + */ +__initfunc(static void +setup_initrd(unsigned int start, unsigned int size)) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (start) { + initrd_start = start; + initrd_end = start + size; + } else { + initrd_start = 0; + initrd_end = 0; + } +#endif +} + +#ifdef CONFIG_ARCH_ACORN +int memc_ctrl_reg; +int number_mfm_drives; +unsigned int vram_size; +#endif + +#ifndef PARAMS_BASE +#define PARAMS_BASE NULL +#endif + +static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } }; +#define ENDIANNESS ((char)endian_test.l) + __initfunc(void setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p)) { + struct param_struct *params = (struct param_struct *)PARAMS_BASE; static unsigned char smptrap; - unsigned long memory_end; - char endian = 'l'; - char *from; + unsigned long memory_end = 0; + char *from = NULL; if (smptrap == 1) return; smptrap = 1; +#if defined(CONFIG_ARCH_ARC) + __machine_arch_type = MACH_TYPE_ARCHIMEDES; +#elif defined(CONFIG_ARCH_A5K) + __machine_arch_type = MACH_TYPE_A5K; +#endif + setup_processor(); - from = setup_params(&memory_end); - setup_initrd(params); + init_task.mm->start_code = TASK_SIZE; + init_task.mm->end_code = TASK_SIZE + (unsigned long) &_etext; + init_task.mm->end_data = TASK_SIZE + (unsigned long) &_edata; + init_task.mm->brk = TASK_SIZE + (unsigned long) &_end; + + /* + * Add your machine dependencies here + */ + switch (machine_arch_type) { + case MACH_TYPE_EBSA110: + /* EBSA110 locks if we execute 'wait for interrupt' */ + disable_hlt(); + params = NULL; + break; + + case MACH_TYPE_EBSA285: + if (params) { + ORIG_X = params->u1.s.video_x; + ORIG_Y = params->u1.s.video_y; + ORIG_VIDEO_COLS = params->u1.s.video_num_cols; + ORIG_VIDEO_LINES = params->u1.s.video_num_rows; + } + break; + + case MACH_TYPE_CO285: + { +#if 0 + extern unsigned long boot_memory_end; + extern char boot_command_line[]; + + from = boot_command_line; + memory_end = boot_memory_end; +#endif + params = NULL; + } + break; + + case MACH_TYPE_CATS: + /* CATS must use soft-reboot */ + reboot_setup("s", NULL); + break; + + case MACH_TYPE_NETWINDER: + /* + * to be fixed in a future NeTTrom + */ + if (params->u1.s.page_size == 4096) { + if (params->u1.s.nr_pages != 0x2000 && + params->u1.s.nr_pages != 0x4000) { + printk("Warning: bad NeTTrom parameters detected, using defaults\n"); + /* + * This stuff doesn't appear to be initialised + * properly by NeTTrom 2.0.6 and 2.0.7 + */ + params->u1.s.nr_pages = 0x2000; /* 32MB */ + params->u1.s.ramdisk_size = 0; + params->u1.s.flags = FLAG_READONLY; + params->u1.s.initrd_start = 0; + params->u1.s.initrd_size = 0; + params->u1.s.rd_start = 0; + params->u1.s.video_x = 0; + params->u1.s.video_y = 0; + params->u1.s.video_num_cols = 80; + params->u1.s.video_num_rows = 30; + } + } else { + printk("Warning: no NeTTrom parameter page detected, using " + "compiled-in settings\n"); + params = NULL; + } + break; + + default: + break; + } + + if (params) { + memory_end = params->u1.s.page_size * + params->u1.s.nr_pages; + + ROOT_DEV = to_kdev_t(params->u1.s.rootdev); + + setup_ram((params->u1.s.flags & FLAG_RDLOAD) == 0, + (params->u1.s.flags & FLAG_RDPROMPT) == 0, + params->u1.s.rd_start); + + setup_initrd(params->u1.s.initrd_start, + params->u1.s.initrd_size); + + if (!(params->u1.s.flags & FLAG_READONLY)) + root_mountflags &= ~MS_RDONLY; + +#ifdef CONFIG_ARCH_ACORN +#ifdef CONFIG_ARCH_RPC + { + extern void init_dram_banks(struct param_struct *); + init_dram_banks(params); + } +#endif + + memc_ctrl_reg = params->u1.s.memc_control_reg; + number_mfm_drives = (params->u1.s.adfsdrives >> 3) & 3; + vram_size = 0; + + switch (params->u1.s.pages_in_vram) { + case 512: + vram_size += PAGE_SIZE * 256; + case 256: + vram_size += PAGE_SIZE * 256; + default: + break; + } + + memory_end -= vram_size; +#endif + + from = params->commandline; + } else { + ROOT_DEV = 0x00ff; + + setup_ram(1, 1, 0); + setup_initrd(0, 0); + } + + if (!memory_end) + memory_end = MEM_SIZE; + + if (!from) + from = default_command_line; + +#ifdef CONFIG_NWFPE + fpe_init(); +#endif /* Save unparsed command line copy for /proc/cmdline */ memcpy(saved_command_line, from, COMMAND_LINE_SIZE); saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; setup_mem(from, memory_start_p, &memory_end); - check_initrd(*memory_start_p, memory_end); - init_task.mm->start_code = TASK_SIZE; - init_task.mm->end_code = TASK_SIZE + (unsigned long) &_etext; - init_task.mm->end_data = TASK_SIZE + (unsigned long) &_edata; - init_task.mm->brk = TASK_SIZE + (unsigned long) &_end; + memory_end += PAGE_OFFSET; - *cmdline_p = command_line; - *memory_end_p = memory_end; + check_initrd(*memory_start_p, memory_end); - sprintf(system_utsname.machine, "%s%c", armidlist[armidindex].arch_vsn, endian); - sprintf(elf_platform, "%s%c", armidlist[armidindex].elf_vsn, endian); + sprintf(system_utsname.machine, "%s%c", armidlist[armidindex].arch_vsn, ENDIANNESS); + sprintf(elf_platform, "%s%c", armidlist[armidindex].elf_vsn, ENDIANNESS); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -385,43 +451,26 @@ setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * mem conswitchp = &dummy_con; #endif #endif + + *cmdline_p = command_line; + *memory_end_p = memory_end; } -static const struct { - char *machine_name; - char *bus_name; -} machine_desc[] = { - { "DEC-EBSA110", "DEC" }, - { "Acorn-RiscPC", "Acorn" }, - { "Nexus-NexusPCI", "PCI" }, - { "DEC-EBSA285", "PCI" }, - { "Corel-Netwinder", "PCI/ISA" }, - { "Chalice-CATS", "PCI" }, - { "unknown-TBOX", "PCI" } +static const char *machine_desc[] = { + "EBSA110", + "Acorn-RiscPC", + "unknown", + "Nexus-FTV/PCI", + "EBSA285", + "Corel-NetWinder", + "Chalice-CATS", + "unknown-TBOX", + "co-EBSA285", + "CL-PS7110", + "Acorn-Archimedes", + "Acorn-A5000" }; -#if defined(CONFIG_ARCH_ARC) -#define HARDWARE "Acorn-Archimedes" -#define IO_BUS "Acorn" -#elif defined(CONFIG_ARCH_A5K) -#define HARDWARE "Acorn-A5000" -#define IO_BUS "Acorn" -#endif - -#if defined(CONFIG_CPU_ARM2) -#define OPTIMISATION "ARM2" -#elif defined(CONFIG_CPU_ARM3) -#define OPTIMISATION "ARM3" -#elif defined(CONFIG_CPU_ARM6) -#define OPTIMISATION "ARM6" -#elif defined(CONFIG_CPU_ARM7) -#define OPTIMISATION "ARM7" -#elif defined(CONFIG_CPU_SA110) -#define OPTIMISATION "StrongARM" -#else -#define OPTIMISATION "unknown" -#endif - int get_cpuinfo(char * buffer) { int len; @@ -429,25 +478,12 @@ int get_cpuinfo(char * buffer) len = sprintf(buffer, "Processor\t: %s %s rev %d\n" "BogoMips\t: %lu.%02lu\n" - "Hardware\t: %s\n" - "Optimisation\t: %s\n" - "IO Bus\t\t: %s\n", + "Hardware\t: %s\n", armidlist[armidindex].manu, armidlist[armidindex].name, (int)processor_id & 15, (loops_per_sec+2500) / 500000, ((loops_per_sec+2500) / 5000) % 100, -#ifdef HARDWARE - HARDWARE, -#else - machine_desc[machine_type].machine_name, -#endif - OPTIMISATION, -#ifdef IO_BUS - IO_BUS -#else - machine_desc[machine_type].bus_name -#endif - ); + machine_desc[machine_arch_type]); return len; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 51e6bcb17..5ec48f752 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -28,7 +28,7 @@ asmlinkage int sys_wait4(pid_t pid, unsigned long * stat_addr, int options, unsigned long *ru); -asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs); +asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs, int syscall); extern int ptrace_cancel_bpt (struct task_struct *); extern int ptrace_set_bpt (struct task_struct *); @@ -50,7 +50,7 @@ asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t m while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(&saveset, regs)) + if (do_signal(&saveset, regs, 0)) return regs->ARM_r0; } } @@ -78,7 +78,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs) while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(&saveset, regs)) + if (do_signal(&saveset, regs, 0)) return regs->ARM_r0; } } @@ -158,12 +158,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) #ifdef CONFIG_CPU_32 err |= __get_user(regs->ARM_cpsr, &sc->arm_cpsr); #endif - if (!valid_user_regs(regs)) - return 1; - /* send SIGTRAP if we're single-stepping */ - if (ptrace_cancel_bpt (current)) - send_sig (SIGTRAP, current, 1); + err |= !valid_user_regs(regs); return err; } @@ -173,6 +169,14 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) struct sigframe *frame; sigset_t set; + /* + * Since we stacked the signal on a word boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->ARM_sp & 3) + goto badframe; + frame = (struct sigframe *)regs->ARM_sp; if (verify_area(VERIFY_READ, frame, sizeof (*frame))) @@ -192,6 +196,10 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->sc)) goto badframe; + /* Send SIGTRAP if we're single-stepping */ + if (ptrace_cancel_bpt (current)) + send_sig(SIGTRAP, current, 1); + return regs->ARM_r0; badframe: @@ -204,6 +212,14 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe *frame; sigset_t set; + /* + * Since we stacked the signal on a word boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->ARM_sp & 3) + goto badframe; + frame = (struct rt_sigframe *)regs->ARM_sp; if (verify_area(VERIFY_READ, frame, sizeof (*frame))) @@ -220,6 +236,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; + /* Send SIGTRAP if we're single-stepping */ + if (ptrace_cancel_bpt (current)) + send_sig(SIGTRAP, current, 1); + return regs->ARM_r0; badframe: @@ -260,6 +280,26 @@ setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/ return err; } +static inline void *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + unsigned long framesize) +{ + unsigned long sp = regs->ARM_sp; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && ! on_sig_stack(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + /* + * No matter what happens, 'sp' must be word + * aligned otherwise nasty things could happen + */ + sp &= ~3; + + return (void *)(sp - framesize); +} + static void setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) { @@ -267,9 +307,9 @@ static void setup_frame(int sig, struct k_sigaction *ka, unsigned long retcode; int err = 0; - frame = (struct sigframe *)regs->ARM_sp - 1; + frame = get_sigframe(ka, regs, sizeof(*frame)); - if (!access_ok(VERIFT_WRITE, frame, sizeof (*frame))) + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto segv_and_exit; err |= setup_sigcontext(&frame->sc, /*&frame->fpstate,*/ regs, set->sig[0]); @@ -286,7 +326,7 @@ static void setup_frame(int sig, struct k_sigaction *ka, } else { retcode = (unsigned long)&frame->retcode; err |= __put_user(SWI_SYS_SIGRETURN, &frame->retcode); - __flush_entry_to_ram (&frame->retcode); + flush_icache_range(retcode, retcode + 4); } if (err) @@ -299,6 +339,11 @@ static void setup_frame(int sig, struct k_sigaction *ka, regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = (unsigned long)ka->sa.sa_handler; +#if defined(CONFIG_CPU_32) + /* Maybe we need to deliver a 32-bit signal to a 26-bit task. */ + if (ka->sa.sa_flags & SA_THIRTYTWO) + regs->ARM_cpsr = USR_MODE; +#endif if (valid_user_regs(regs)) return; @@ -315,7 +360,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, unsigned long retcode; int err = 0; - frame = (struct rt_sigframe *)regs->ARM_sp - 1; + frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe)); + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto segv_and_exit; @@ -337,7 +383,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } else { retcode = (unsigned long)&frame->retcode; err |= __put_user(SWI_SYS_RT_SIGRETURN, &frame->retcode); - __flush_entry_to_ram (&frame->retcode); + flush_icache_range(retcode, retcode + 4); } if (err) @@ -350,6 +396,11 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = (unsigned long)ka->sa.sa_handler; +#if defined(CONFIG_CPU_32) + /* Maybe we need to deliver a 32-bit signal to a 26-bit task. */ + if (ka->sa.sa_flags & SA_THIRTYTWO) + regs->ARM_cpsr = USR_MODE; +#endif if (valid_user_regs(regs)) return; @@ -393,18 +444,25 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) +asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall) { - unsigned long instr, *pc = (unsigned long *)(instruction_pointer(regs)-4); struct k_sigaction *ka; siginfo_t info; - int single_stepping, swi_instr; + int single_stepping; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if (!user_mode(regs)) + return 0; if (!oldset) oldset = ¤t->blocked; single_stepping = ptrace_cancel_bpt (current); - swi_instr = (!get_user (instr, pc) && (instr & 0x0f000000) == 0x0f000000); for (;;) { unsigned long signr; @@ -503,7 +561,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) } /* Are we from a system call? */ - if (swi_instr) { + if (syscall) { switch (regs->ARM_r0) { case -ERESTARTNOHAND: regs->ARM_r0 = -EINTR; @@ -527,7 +585,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) return 1; } - if (swi_instr && + if (syscall && (regs->ARM_r0 == -ERESTARTNOHAND || regs->ARM_r0 == -ERESTARTSYS || regs->ARM_r0 == -ERESTARTNOINTR)) { diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index d50b90f8d..9da64aad0 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -223,13 +223,7 @@ out: */ asmlinkage int sys_fork(struct pt_regs *regs) { - int ret; - - lock_kernel(); - ret = do_fork(SIGCHLD, regs->ARM_sp, regs); - unlock_kernel(); - - return ret; + return do_fork(SIGCHLD, regs->ARM_sp, regs); } /* Clone a task - this clones the calling program thread. @@ -237,14 +231,14 @@ asmlinkage int sys_fork(struct pt_regs *regs) */ asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs) { - int ret; - - lock_kernel(); if (!newsp) newsp = regs->ARM_sp; - ret = do_fork(clone_flags, newsp, regs); - unlock_kernel(); - return ret; + return do_fork(clone_flags, newsp, regs); +} + +asmlinkage int sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs); } /* sys_execve() executes a new program. diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index b6448e942..c874a1ba8 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -129,27 +129,12 @@ void do_settimeofday(struct timeval *tv) time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - sti (); + sti(); } -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick. - */ -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - if (reset_timer ()) - do_timer(regs); - - update_rtc (); -} - -static struct irqaction irqtimer = { timer_interrupt, 0, 0, "timer", NULL, NULL}; - __initfunc(void time_init(void)) { - xtime.tv_sec = setup_timer(); xtime.tv_usec = 0; - setup_arm_irq(IRQ_TIMER, &irqtimer); + setup_timer(); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5d04f325b..9267fec09 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -24,7 +24,6 @@ #include <asm/atomic.h> #include <asm/pgtable.h> -extern void die_if_kernel(char *str, struct pt_regs *regs, int err, int ret); extern void c_backtrace (unsigned long fp, int pmode); extern int ptrace_cancel_bpt (struct task_struct *); @@ -45,16 +44,17 @@ static inline void console_verbose(void) int kstack_depth_to_print = 200; -static int verify_stack_pointer (unsigned long stackptr, int size) +/* + * Stack pointers should always be within the kernels view of + * physical memory. If it is not there, then we can't dump + * out any information relating to the stack. + */ +static int verify_stack(unsigned long sp) { -#ifdef CONFIG_CPU_26 - if (stackptr < 0x02048000 || stackptr + size > 0x03000000) - return -EFAULT; -#else - if (stackptr < PAGE_OFFSET || stackptr + size > (unsigned long)high_memory) + if (sp < PAGE_OFFSET || sp > (unsigned long)high_memory) return -EFAULT; -#endif - return 0; + + return 0; } /* @@ -90,22 +90,26 @@ void dump_mem(unsigned long bottom, unsigned long top) static void dump_instr(unsigned long pc, int user) { - unsigned long module_start, module_end; int pmin = -2, pmax = 3, ok = 0; extern char start_kernel, _etext; if (!user) { + unsigned long module_start, module_end; + unsigned long kernel_start, kernel_end; + module_start = VMALLOC_START; module_end = module_start + MODULE_RANGE; - if ((pc >= (unsigned long) &start_kernel) && - (pc <= (unsigned long) &_etext)) { - if (pc + pmin < (unsigned long) &start_kernel) - pmin = ((unsigned long) &start_kernel) - pc; - if (pc + pmax > (unsigned long) &_etext) - pmax = ((unsigned long) &_etext) - pc; + kernel_start = (unsigned long)&start_kernel; + kernel_end = (unsigned long)&_etext; + + if (pc >= kernel_start && pc < kernel_end) { + if (pc + pmin < kernel_start) + pmin = kernel_start - pc; + if (pc + pmax > kernel_end) + pmax = kernel_end - pc; ok = 1; - } else if (pc >= module_start && pc <= module_end) { + } else if (pc >= module_start && pc < module_end) { if (pc + pmin < module_start) pmin = module_start - pc; if (pc + pmax > module_end) @@ -125,119 +129,138 @@ static void dump_instr(unsigned long pc, int user) printk ("pc not in code space\n"); } -static void dump_state(char *str, struct pt_regs *regs, int err) +spinlock_t die_lock; + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) { + struct task_struct *tsk = current; + + spin_lock_irq(&die_lock); + console_verbose(); printk("Internal error: %s: %x\n", str, err); printk("CPU: %d\n", smp_processor_id()); show_regs(regs); printk("Process %s (pid: %d, stackpage=%08lx)\n", - current->comm, current->pid, 4096+(unsigned long)current); + current->comm, current->pid, 4096+(unsigned long)tsk); + + if (!user_mode(regs)) { + unsigned long sp = (unsigned long)(regs + 1); + unsigned long fp; + int dump_info = 1; + + printk("Stack: "); + if (verify_stack(sp)) { + printk("invalid kernel stack pointer %08lx", sp); + dump_info = 0; + } else if (sp < 4096+(unsigned long)tsk) + printk("kernel stack pointer underflow"); + printk("\n"); + + if (dump_info) + dump_mem(sp - 16, 8192+(unsigned long)tsk); + + dump_info = 1; + + printk("Backtrace: "); + fp = regs->ARM_fp; + if (!fp) { + printk("no frame pointer"); + dump_info = 0; + } else if (verify_stack(fp)) { + printk("invalid frame pointer %08lx", fp); + dump_info = 0; + } else if (fp < 4096+(unsigned long)tsk) + printk("frame pointer underflow"); + printk("\n"); + + if (dump_info) + c_backtrace(fp, processor_mode(regs)); + + dump_instr(instruction_pointer(regs), 0); + } + + spin_unlock_irq(&die_lock); } -/* - * This function is protected against kernel-mode re-entrancy. If it - * is re-entered it will hang the system since we can't guarantee in - * this case that any of the functions that it calls are safe any more. - * Even the panic function could be a problem, but we'll give it a go. - */ -void die_if_kernel(char *str, struct pt_regs *regs, int err, int ret) +static void die_if_kernel(const char *str, struct pt_regs *regs, int err) { - static int died = 0; - unsigned long cstack, sstack, frameptr; - if (user_mode(regs)) return; - switch (died) { - case 2: - while (1); - case 1: - died ++; - panic ("die_if_kernel re-entered. Major kernel corruption. Please reboot me!"); - break; - case 0: - died ++; - break; - } - - dump_state(str, regs, err); - - cstack = (unsigned long)(regs + 1); - sstack = 4096+(unsigned long)current; - - printk("Stack: "); - if (verify_stack_pointer(cstack, 4)) - printk("invalid kernel stack pointer %08lx", cstack); - else if(cstack > sstack + 4096) - printk("(sp overflow)"); - else if(cstack < sstack) - printk("(sp underflow)"); - printk("\n"); - - dump_mem(cstack - 16, sstack + 4096); - - frameptr = regs->ARM_fp; - if (frameptr) { - if (verify_stack_pointer (frameptr, 4)) - printk ("Backtrace: invalid frame pointer\n"); - else { - printk("Backtrace: \n"); - c_backtrace (frameptr, processor_mode(regs)); - } - } - - dump_instr(instruction_pointer(regs), 0); - died = 0; - if (ret != -1) - do_exit (ret); - else { - cli (); - while (1); - } + die(str, regs, err); } -void bad_user_access_alignment (const void *ptr) +void bad_user_access_alignment(const void *ptr) { - void *pc; - __asm__("mov %0, lr\n": "=r" (pc)); - printk (KERN_ERR "bad_user_access_alignment called: ptr = %p, pc = %p\n", ptr, pc); + printk(KERN_ERR "bad user access alignment: ptr = %p, pc = %p\n", ptr, + __builtin_return_address(0)); current->tss.error_code = 0; current->tss.trap_no = 11; - force_sig (SIGBUS, current); -/* die_if_kernel("Oops - bad user access alignment", regs, mode, SIGBUS);*/ + force_sig(SIGBUS, current); +/* die_if_kernel("Oops - bad user access alignment", regs, mode);*/ } -asmlinkage void do_undefinstr (int address, struct pt_regs *regs, int mode) +asmlinkage void do_undefinstr(int address, struct pt_regs *regs, int mode) { +#ifdef CONFIG_DEBUG_USER + printk(KERN_INFO "%s (%d): undefined instruction: pc=%08lx\n", + current->comm, current->pid, instruction_pointer(regs)); +#endif current->tss.error_code = 0; current->tss.trap_no = 6; - force_sig (SIGILL, current); - die_if_kernel("Oops - undefined instruction", regs, mode, SIGILL); + force_sig(SIGILL, current); + die_if_kernel("Oops - undefined instruction", regs, mode); } -asmlinkage void do_excpt (int address, struct pt_regs *regs, int mode) +asmlinkage void do_excpt(int address, struct pt_regs *regs, int mode) { +#ifdef CONFIG_DEBUG_USER + printk(KERN_INFO "%s (%d): address exception: pc=%08lx\n", + current->comm, current->pid, instruction_pointer(regs)); +#endif current->tss.error_code = 0; current->tss.trap_no = 11; - force_sig (SIGBUS, current); - die_if_kernel("Oops - address exception", regs, mode, SIGBUS); + force_sig(SIGBUS, current); + die_if_kernel("Oops - address exception", regs, mode); } asmlinkage void do_unexp_fiq (struct pt_regs *regs) { #ifndef CONFIG_IGNORE_FIQ - printk ("Hmm. Unexpected FIQ received, but trying to continue\n"); - printk ("You may have a hardware problem...\n"); + printk("Hmm. Unexpected FIQ received, but trying to continue\n"); + printk("You may have a hardware problem...\n"); #endif } +/* + * bad_mode handles the impossible case in the vectors. + * If you see one of these, then it's extremely serious, + * and could mean you have buggy hardware. It never + * returns, and never tries to sync. We hope that we + * can dump out some state information... + */ asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode) { - printk (KERN_CRIT "Bad mode in %s handler detected: mode %s\n", - handler[reason], - processor_modes[proc_mode]); - die_if_kernel ("Oops", regs, 0, -1); + console_verbose(); + + printk(KERN_CRIT "Bad mode in %s handler detected: mode %s\n", + handler[reason], processor_modes[proc_mode]); + + /* + * Dump out the vectors and stub routines + */ + printk(KERN_CRIT "Vectors:\n"); + dump_mem(0, 0x40); + printk(KERN_CRIT "Stubs:\n"); + dump_mem(0x200, 0x4b8); + + die("Oops", regs, 0); + cli(); + while(1); } /* @@ -249,54 +272,85 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode) */ asmlinkage void math_state_restore (void) { - current->used_math = 1; + current->used_math = 1; } -asmlinkage void arm_syscall (int no, struct pt_regs *regs) +asmlinkage int arm_syscall (int no, struct pt_regs *regs) { switch (no) { case 0: /* branch through 0 */ force_sig(SIGSEGV, current); -// if (user_mode(regs)) { -// dump_state("branch through zero", regs, 0); -// if (regs->ARM_fp) -// c_backtrace (regs->ARM_fp, processor_mode(regs)); -// } - die_if_kernel ("branch through zero", regs, 0, SIGSEGV); + die_if_kernel("branch through zero", regs, 0); break; case 1: /* SWI_BREAK_POINT */ regs->ARM_pc -= 4; /* Decrement PC by one instruction */ - ptrace_cancel_bpt (current); - force_sig (SIGTRAP, current); + ptrace_cancel_bpt(current); + force_sig(SIGTRAP, current); + return regs->ARM_r0; + + case 2: /* sys_cacheflush */ +#ifdef CONFIG_CPU_32 + /* r0 = start, r1 = length, r2 = flags */ + processor.u.armv3v4._flush_cache_area(regs->ARM_r0, + regs->ARM_r1, + 1); +#endif break; default: - printk ("[%d] %s: arm syscall %d\n", current->pid, current->comm, no); - force_sig (SIGILL, current); + /* Calls 9f00xx..9f07ff are defined to return -ENOSYS + if not implemented, rather than raising SIGILL. This + way the calling program can gracefully determine whether + a feature is supported. */ + if (no <= 0x7ff) + return -ENOSYS; +#ifdef CONFIG_DEBUG_USER + /* experiance shows that these seem to indicate that + * something catastrophic has happened + */ + printk("[%d] %s: arm syscall %d\n", current->pid, current->comm, no); if (user_mode(regs)) { - show_regs (regs); - c_backtrace (regs->ARM_fp, processor_mode(regs)); + show_regs(regs); + c_backtrace(regs->ARM_fp, processor_mode(regs)); } - die_if_kernel ("Oops", regs, no, SIGILL); +#endif + force_sig(SIGILL, current); + die_if_kernel("Oops", regs, no); break; } + return 0; } asmlinkage void deferred(int n, struct pt_regs *regs) { - dump_state("old system call", regs, n); - force_sig (SIGILL, current); + /* You might think just testing `handler' would be enough, but PER_LINUX + * points it to no_lcall7 to catch undercover SVr4 binaries. Gutted. + */ + if (current->personality != PER_LINUX && current->exec_domain->handler) { + /* Hand it off to iBCS. The extra parameter and consequent type + * forcing is necessary because of the weird ARM calling convention. + */ + void (*handler)(int nr, struct pt_regs *regs) = (void *)current->exec_domain->handler; + (*handler)(n, regs); + return; + } + +#ifdef CONFIG_DEBUG_USER + printk(KERN_ERR "[%d] %s: old system call.\n", current->pid, + current->comm); +#endif + force_sig(SIGILL, current); } asmlinkage void arm_malalignedptr(const char *str, void *pc, volatile void *ptr) { - printk ("Mal-aligned pointer in %s: %p (PC=%p)\n", str, ptr, pc); + printk("Mal-aligned pointer in %s: %p (PC=%p)\n", str, ptr, pc); } -asmlinkage void arm_invalidptr (const char *function, int size) +asmlinkage void arm_invalidptr(const char *function, int size) { - printk ("Invalid pointer size in %s (PC=%p) size %d\n", + printk("Invalid pointer size in %s (pc=%p) size %d\n", function, __builtin_return_address(0), size); } diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 684db2a47..0b241d333 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,14 +6,14 @@ L_TARGET := lib.a L_OBJS := backtrace.o bitops.o checksum.o delay.o io.o memcpy.o \ - system.o string.o uaccess.o + semaphore.o string.o system.o uaccess.o ifeq ($(PROCESSOR),armo) L_OBJS += uaccess-armo.o endif ifdef CONFIG_ARCH_ACORN - L_OBJS += loaders.o ll_char_wr.o io-acorn.o + L_OBJS += loaders.o io-acorn.o ifdef CONFIG_ARCH_A5K L_OBJS += floppydma.o endif @@ -26,12 +26,8 @@ ifeq ($(MACHINE),ebsa110) L_OBJS += io-ebsa110.o endif -ifeq ($(MACHINE),vnc) - L_OBJS += io-ebsa285.o -endif - -ifeq ($(MACHINE),ebsa285) - L_OBJS += io-ebsa285.o +ifeq ($(MACHINE),footbridge) + L_OBJS += io-footbridge.o endif include $(TOPDIR)/Rules.make @@ -45,10 +41,4 @@ getconsdata.o: getconsdata.c checksum.o: constants.h %.o: %.S -ifneq ($(CONFIG_BINUTILS_NEW),y) - $(CC) $(CFLAGS) -D__ASSEMBLY__ -E $< | tr ';$$' '\n#' > ..tmp.$<.s - $(CC) $(CFLAGS:-pipe=) -c -o $@ ..tmp.$<.s - $(RM) ..tmp.$<.s -else $(CC) $(CFLAGS) -D__ASSEMBLY__ -c -o $@ $< -endif diff --git a/arch/arm/lib/checksum.S b/arch/arm/lib/checksum.S index bd5c78d34..daf49fc94 100644 --- a/arch/arm/lib/checksum.S +++ b/arch/arm/lib/checksum.S @@ -520,13 +520,13 @@ Ldst_aligned: tst r0, #3 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) ldr r4, [r0], #4 tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 mov r4, r4, lsr #8 strb r4, [r1], #1 mov r4, r4, lsr #8 - b Lexit + b Lexit_r4 Ltoo_small: teq r2, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) @@ -538,10 +538,12 @@ Ltoo_small: teq r2, #0 adds r3, r3, ip strb ip, [r1], #1 strb r8, [r1], #1 -Lexit: tst r2, #1 -Ltoo_small1: ldrneb ip, [r0], #1 - strneb ip, [r1], #1 - adcnes r3, r3, ip + tst r2, #1 +Ltoo_small1: ldrneb r4, [r0], #1 +Lexit_r4: tst r2, #1 + strneb r4, [r1], #1 + andne r4, r4, #255 + adcnes r3, r3, r4 adcs r0, r3, #0 LOADREGS(ea,fp,{r4 - r8, fp, sp, pc}) @@ -598,13 +600,13 @@ Lsrc_not_aligned: adceq r0, r3, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 mov r4, r4, lsr #8 strb r4, [r1], #1 mov r4, r4, lsr #8 - b Lexit + b Lexit_r4 Lsrc2_aligned: mov r4, r4, lsr #16 adds r3, r3, #0 @@ -650,13 +652,13 @@ Lsrc2_aligned: mov r4, r4, lsr #16 adceq r0, r3, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 mov r4, r4, lsr #8 strb r4, [r1], #1 ldrb r4, [r0], #1 - b Lexit + b Lexit_r4 Lsrc3_aligned: mov r4, r4, lsr #24 adds r3, r3, #0 @@ -702,14 +704,14 @@ Lsrc3_aligned: mov r4, r4, lsr #24 adceq r0, r3, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 ldr r4, [r0], #4 strb r4, [r1], #1 adcs r3, r3, r4, lsl #24 mov r4, r4, lsr #8 - b Lexit + b Lexit_r4 ENTRY(__csum_ipv6_magic) stmfd sp!, {lr} diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S index 08fdccb27..778d3e574 100644 --- a/arch/arm/lib/floppydma.S +++ b/arch/arm/lib/floppydma.S @@ -26,32 +26,3 @@ ENTRY(floppy_fiqout_start) strb r12, [r11, #-4] subs pc, lr, #4 SYMBOL_NAME(floppy_fiqout_end): - -@ Params: -@ r0 = length -@ r1 = address -@ r2 = floppy port -@ Puts these into R9_fiq, R10_fiq, R11_fiq -ENTRY(floppy_fiqsetup) - mov ip, sp - stmfd sp!, {fp, ip, lr, pc} - sub fp, ip, #4 - MODE(r3,ip,I_BIT|F_BIT|DEFAULT_FIQ) @ disable FIQs, IRQs, FIQ mode - mov r0, r0 - mov r9, r0 - mov r10, r1 - mov r11, r2 - RESTOREMODE(r3) @ back to normal - mov r0, r0 - LOADREGS(ea,fp,{fp, sp, pc}) - -ENTRY(floppy_fiqresidual) - mov ip, sp - stmfd sp!, {fp, ip, lr, pc} - sub fp, ip, #4 - MODE(r3,ip,I_BIT|F_BIT|DEFAULT_FIQ) @ disable FIQs, IRQs, FIQ mode - mov r0, r0 - mov r0, r9 - RESTOREMODE(r3) - mov r0, r0 - LOADREGS(ea,fp,{fp, sp, pc}) diff --git a/arch/arm/lib/getconsdata.c b/arch/arm/lib/getconsdata.c index ba145eaff..27f4ca2ef 100644 --- a/arch/arm/lib/getconsdata.c +++ b/arch/arm/lib/getconsdata.c @@ -67,6 +67,23 @@ unsigned long PAGE_OLD = _PAGE_OLD; unsigned long PAGE_CLEAN = _PAGE_CLEAN; #endif +#ifdef PTE_TYPE_SMALL +unsigned long HPTE_TYPE_SMALL = PTE_TYPE_SMALL; +unsigned long HPTE_AP_READ = PTE_AP_READ; +unsigned long HPTE_AP_WRITE = PTE_AP_WRITE; +#endif + +#ifdef L_PTE_PRESENT +unsigned long LPTE_PRESENT = L_PTE_PRESENT; +unsigned long LPTE_YOUNG = L_PTE_YOUNG; +unsigned long LPTE_BUFFERABLE = L_PTE_BUFFERABLE; +unsigned long LPTE_CACHEABLE = L_PTE_CACHEABLE; +unsigned long LPTE_USER = L_PTE_USER; +unsigned long LPTE_WRITE = L_PTE_WRITE; +unsigned long LPTE_EXEC = L_PTE_EXEC; +unsigned long LPTE_DIRTY = L_PTE_DIRTY; +#endif + unsigned long KSWI_BASE = 0x900000; unsigned long KSWI_SYS_BASE = 0x9f0000; unsigned long SYS_ERROR0 = 0x9f0000; diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S index 6baa4cd50..bf2dd6333 100644 --- a/arch/arm/lib/io-acorn.S +++ b/arch/arm/lib/io-acorn.S @@ -11,50 +11,514 @@ .text .align -#define OUT(reg) \ - mov r8, reg, lsl $16 ;\ - orr r8, r8, r8, lsr $16 ;\ - str r8, [r3, r0, lsl $2] ;\ - mov r8, reg, lsr $16 ;\ - orr r8, r8, r8, lsl $16 ;\ - str r8, [r3, r0, lsl $2] - -#define IN(reg) \ - ldr reg, [r0] ;\ - and reg, reg, ip ;\ - ldr lr, [r0] ;\ - orr reg, reg, lr, lsl $16 - - .equ pcio_base_high, PCIO_BASE & 0xff000000 - .equ pcio_base_low, PCIO_BASE & 0x00ff0000 - .equ io_base_high, IO_BASE & 0xff000000 - .equ io_base_low, IO_BASE & 0x00ff0000 - - .equ addr_io_diff_hi, pcio_base_high - io_base_high - .equ addr_io_diff_lo, pcio_base_low - io_base_low - - .macro addr reg, off - tst \off, #0x80000000 - .if addr_io_diff_hi - movne \reg, #IO_BASE - moveq \reg, #pcio_base_high - .if pcio_base_low - addeq \reg, \reg, #pcio_base_low - .endif - .else - mov \reg, #IO_BASE - addeq \reg, \reg, #addr_io_diff_lo - .endif + .equ diff_pcio_base, PCIO_BASE - IO_BASE + + .macro outw2 rd + mov r8, \rd, lsl #16 + orr r8, r8, r8, lsr #16 + str r8, [r3, r0, lsl #2] + mov r8, \rd, lsr #16 + orr r8, r8, r8, lsl #16 + str r8, [r3, r0, lsl #2] + .endm + + .macro inw2 rd, mask, temp + ldr \rd, [r0] + and \rd, \rd, \mask + ldr \temp, [r0] + orr \rd, \rd, \temp, lsl #16 .endm -@ Purpose: read a block of data from a hardware register to memory. -@ Proto : insw(int from_port, void *to, int len_in_words); -@ Proto : inswb(int from_port, void *to, int len_in_bytes); -@ Notes : increment to + .macro addr rd + tst \rd, #0x80000000 + mov \rd, \rd, lsl #2 + add \rd, \rd, #IO_BASE + addeq \rd, \rd, #diff_pcio_base + .endm + +.iosw_bad_align_msg: + .ascii "insw: bad buffer alignment (%p), called from %08lX\n\0" +.iosl_warning: + .ascii "<4>insl/outsl not implemented, called from %08lX\0" + .align + +/* + * These make no sense on Acorn machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) + adr r0, .iosl_warning + mov r1, lr + b SYMBOL_NAME(printk) + +.iosw_bad_alignment: + adr r0, .iosw_bad_align_msg + mov r2, lr + b SYMBOL_NAME(panic) + + +/* Purpose: read a block of data from a hardware register to memory. + * Proto : void insw(int from_port, void *to, int len_in_words); + * Notes : increment to, 'to' must be 16-bit aligned + */ + +.insw_align: tst r1, #1 + bne .iosw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + bne .insw_aligned ENTRY(insw) + teq r2, #0 + RETINSTR(moveq,pc,lr) + addr r0 + tst r1, #3 + bne .insw_align + +.insw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_insw_8 + +.insw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + subs r2, r2, #8 + bpl .insw_8_lp + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_insw_8: tst r2, #4 + beq .no_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.no_insw_4: tst r2, #2 + beq .no_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.no_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + LOADREGS(fd, sp!, {r4, r5, r6, pc}) + +@ Purpose: write a block of data from memory to a hardware register. +@ Proto : outsw(int to_reg, void *from, int len_in_words); +@ Notes : increments from + +.outsw_align: tst r1, #1 + bne .iosw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + bne .outsw_aligned + +ENTRY(outsw) + teq r2, #0 + RETINSTR(moveq,pc,lr) + addr r0 + tst r1, #3 + bne .outsw_align + +.outsw_aligned: stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_outsw_8 +.outsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .outsw_8_lp + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_outsw_8: tst r2, #4 + beq .no_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_4: tst r2, #2 + beq .no_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + LOADREGS(fd, sp!, {r4, r5, r6, pc}) + +.insb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1], #1 + subs r2, r2, ip + bne .insb_aligned + +ENTRY(insb) + teq r2, #0 + moveq pc, lr + addr r0 + ands ip, r1, #3 + bne .insb_align + +.insb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .insb_no_16 + +.insb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + ldrb r4, [r0] + ldrb r5, [r0] + orr r4, r4, r5, lsl #8 + ldrb r5, [r0] + orr r4, r4, r5, lsl #16 + ldrb r5, [r0] + orr r4, r4, r5, lsl #24 + ldrb r5, [r0] + ldrb r6, [r0] + orr r5, r5, r6, lsl #8 + ldrb r6, [r0] + orr r5, r5, r6, lsl #16 + ldrb r6, [r0] + orr r5, r5, r6, lsl #24 + ldrb r6, [r0] + ldrb ip, [r0] + orr r6, r6, ip, lsl #8 + ldrb ip, [r0] + orr r6, r6, ip, lsl #16 + ldrb ip, [r0] + orr r6, r6, ip, lsl #24 + stmia r1!, {r3 - r6} + subs r2, r2, #16 + bpl .insb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.insb_no_16: tst r2, #8 + beq .insb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + ldrb r4, [r0] + ldrb r5, [r0] + orr r4, r4, r5, lsl #8 + ldrb r5, [r0] + orr r4, r4, r5, lsl #16 + ldrb r5, [r0] + orr r4, r4, r5, lsl #24 + stmia r1!, {r3, r4} + +.insb_no_8: tst r2, #4 + bne .insb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + str r3, [r1], #4 + +.insb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1] + LOADREGS(fd, sp!, {r4 - r6, pc}) + + + +.outsb_align: rsb ip, ip, #4 + cmp ip, r2 + mov ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1], #1 + strgtb r3, [r0] + subs r2, r2, ip + bne .outsb_aligned + +ENTRY(outsb) + teq r2, #0 + moveq pc, lr + addr r0 + ands ip, r1, #3 + bne .outsb_align + +.outsb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .outsb_no_16 + +.outsb_16_lp: ldmia r1!, {r3 - r6} + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + subs r2, r2, #16 + bpl .outsb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.outsb_no_16: tst r2, #8 + beq .outsb_no_8 + + ldmia r1, {r3, r4} + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + +.outsb_no_8: tst r2, #4 + bne .outsb_no_4 + + ldr r3, [r1], #4 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + +.outsb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1] + strgtb r3, [r0] + LOADREGS(fd, sp!, {r4 - r6, pc}) + + + + +@ Purpose: write a memc register +@ Proto : void memc_write(int register, int value); +@ Returns: nothing + +#if defined(CONFIG_CPU_26) +ENTRY(memc_write) + cmp r0, #7 + RETINSTR(movgt,pc,lr) + mov r0, r0, lsl #17 + mov r1, r1, lsl #15 + mov r1, r1, lsr #17 + orr r0, r0, r1, lsl #2 + add r0, r0, #0x03600000 + strb r0, [r0] + RETINSTR(mov,pc,lr) +#define CPSR2SPSR(rt) +#else +#define CPSR2SPSR(rt) \ + mrs rt, cpsr; \ + msr spsr, rt +#endif + +@ Purpose: call an expansion card loader to read bytes. +@ Proto : char read_loader(int offset, char *card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_read) + stmfd sp!, {r4 - r12, lr} + mov r11, r1 + mov r1, r0 + CPSR2SPSR(r0) + mov lr, pc + mov pc, r2 + LOADREGS(fd, sp!, {r4 - r12, pc}) + +@ Purpose: call an expansion card loader to reset the card +@ Proto : void read_loader(int card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_reset) + stmfd sp!, {r4 - r12, lr} + mov r11, r0 + CPSR2SPSR(r0) + mov lr, pc + add pc, r1, #8 + LOADREGS(fd, sp!, {r4 - r12, pc}) + + +#if 0 mov r2, r2, lsl#1 -ENTRY(inswb) mov ip, sp stmfd sp!, {r4 - r10, fp, ip, lr, pc} sub fp, ip, #4 @@ -122,14 +586,9 @@ Linsw_notaligned: bgt Linsw_notaligned LOADREGS(ea, fp, {r4 - r10, fp, sp, pc}) -@ Purpose: write a block of data from memory to a hardware register. -@ Proto : outsw(int to_reg, void *from, int len_in_words); -@ Proto : outswb(int to_reg, void *from, int len_in_bytes); -@ Notes : increments from ENTRY(outsw) - mov r2, r2, LSL#1 -ENTRY(outswb) + mov r2, r2, lsl#1 mov ip, sp stmfd sp!, {r4 - r8, fp, ip, lr, pc} sub fp, ip, #4 @@ -166,56 +625,5 @@ ENTRY(outswb) bgt 3b LOADREGS(ea, fp, {r4 - r8, fp, sp, pc}) -/* - * These make no sense on Acorn machines atm. - */ -ENTRY(insl) -ENTRY(outsl) - RETINSTR(mov,pc,lr) - -@ Purpose: write a memc register -@ Proto : void memc_write(int register, int value); -@ Returns: nothing - -#if defined(CONFIG_CPU_26) -ENTRY(memc_write) - cmp r0, #7 - RETINSTR(movgt,pc,lr) - mov r0, r0, lsl #17 - mov r1, r1, lsl #15 - mov r1, r1, lsr #17 - orr r0, r0, r1, lsl #2 - add r0, r0, #0x03600000 - strb r0, [r0] - RETINSTR(mov,pc,lr) -#define CPSR2SPSR(rt) -#else -#define CPSR2SPSR(rt) \ - mrs rt, cpsr; \ - msr spsr, rt #endif -@ Purpose: call an expansion card loader to read bytes. -@ Proto : char read_loader(int offset, char *card_base, char *loader); -@ Returns: byte read - -ENTRY(ecard_loader_read) - stmfd sp!, {r4 - r12, lr} - mov r11, r1 - mov r1, r0 - CPSR2SPSR(r0) - mov lr, pc - mov pc, r2 - LOADREGS(fd, sp!, {r4 - r12, pc}) - -@ Purpose: call an expansion card loader to reset the card -@ Proto : void read_loader(int card_base, char *loader); -@ Returns: byte read - -ENTRY(ecard_loader_reset) - stmfd sp!, {r4 - r12, lr} - mov r11, r0 - CPSR2SPSR(r0) - mov lr, pc - add pc, r1, #8 - LOADREGS(fd, sp!, {r4 - r12, pc}) diff --git a/arch/arm/lib/io-ebsa110.S b/arch/arm/lib/io-ebsa110.S index e0b8229a4..b29276ff7 100644 --- a/arch/arm/lib/io-ebsa110.S +++ b/arch/arm/lib/io-ebsa110.S @@ -22,6 +22,22 @@ ldr lr, [r0] ;\ orr reg, reg, lr, lsl $16 +/* + * These make no sense on these machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) +ENTRY(insb) +ENTRY(outsb) + adr r0, io_long_warning + mov r1, lr + b SYMBOL_NAME(printk) + +io_long_warning: + .ascii "<4>ins?/outs? not implemented on this architecture\0" + .align + @ Purpose: read a block of data from a hardware register to memory. @ Proto : insw(int from_port, void *to, int len_in_words); @ Proto : inswb(int from_port, void *to, int len_in_bytes); diff --git a/arch/arm/lib/io-ebsa285.S b/arch/arm/lib/io-footbridge.S index a86983d43..0734c6042 100644 --- a/arch/arm/lib/io-ebsa285.S +++ b/arch/arm/lib/io-footbridge.S @@ -1,8 +1,16 @@ #include <linux/linkage.h> +#include <asm/hardware.h> + + .equ pcio_high, PCIO_BASE & 0xff000000 + .equ pcio_low, PCIO_BASE & 0x00ffffff + + .macro ioaddr, rd,rn + add \rd, \rn, #pcio_high + add \rd, \rd, #pcio_low + .endm ENTRY(insl) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 ands ip, r1, #3 bne 2f @@ -14,49 +22,48 @@ ENTRY(insl) 2: cmp ip, #2 ldr ip, [r0] - blt 3f - bgt 4f + blt 4f + bgt 6f strh ip, [r1], #2 mov ip, ip, lsr #16 -1: subs r2, r2, #1 +3: subs r2, r2, #1 ldrne r3, [r0] orrne ip, ip, r3, lsl #16 strne ip, [r1], #4 movne ip, r3, lsr #16 - bne 1b + bne 3b strh ip, [r1], #2 mov pc, lr -3: strb ip, [r1], #1 +4: strb ip, [r1], #1 mov ip, ip, lsr #8 strh ip, [r1], #2 mov ip, ip, lsr #16 -1: subs r2, r2, #1 +5: subs r2, r2, #1 ldrne r3, [r0] orrne ip, ip, r3, lsl #8 strne ip, [r1], #4 movne ip, r3, lsr #24 - bne 1b + bne 5b strb ip, [r1], #1 mov pc, lr -4: strb ip, [r1], #1 +6: strb ip, [r1], #1 mov ip, ip, lsr #8 -1: subs r2, r2, #1 +7: subs r2, r2, #1 ldrne r3, [r0] orrne ip, ip, r3, lsl #24 strne ip, [r1], #4 movne ip, r3, lsr #8 - bne 1b + bne 7b strb ip, [r1], #1 mov ip, ip, lsr #8 strh ip, [r1], #2 mov pc, lr ENTRY(outsl) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 ands ip, r1, #3 bne 2f @@ -70,31 +77,31 @@ ENTRY(outsl) cmp ip, #2 ldr ip, [r1], #4 mov ip, ip, lsr #16 - blt 3f - bgt 4f + blt 4f + bgt 5f -1: ldr r3, [r1], #4 +3: ldr r3, [r1], #4 orr ip, ip, r3, lsl #16 str ip, [r0] mov ip, r3, lsr #16 subs r2, r2, #1 - bne 1b + bne 3b mov pc, lr -3: ldr r3, [r1], #4 +4: ldr r3, [r1], #4 orr ip, ip, r3, lsl #8 str ip, [r0] mov ip, r3, lsr #24 subs r2, r2, #1 - bne 3b + bne 4b mov pc, lr -4: ldr r3, [r1], #4 +5: ldr r3, [r1], #4 orr ip, ip, r3, lsl #24 str ip, [r0] mov ip, r3, lsr #8 subs r2, r2, #1 - bne 4b + bne 5b mov pc, lr /* Nobody could say these are optimal, but not to worry. */ @@ -102,8 +109,7 @@ ENTRY(outsl) ENTRY(outswb) mov r2, r2, lsr #1 ENTRY(outsw) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 1: subs r2, r2, #1 ldrgeh r3, [r1], #2 strgeh r3, [r0] @@ -114,8 +120,7 @@ ENTRY(inswb) mov r2, r2, lsr #1 ENTRY(insw) stmfd sp!, {r4, r5, lr} - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 @ + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 subs ip, r2, #8 blo too_little @@ -176,8 +181,7 @@ too_little: subs r2, r2, #1 ENTRY(insb) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 1: teq r2, #0 ldrneb r3, [r0] strneb r3, [r1], #1 @@ -187,8 +191,7 @@ ENTRY(insb) ENTRY(outsb) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 1: teq r2, #0 ldrneb r3, [r1], #1 strneb r3, [r0] diff --git a/arch/arm/lib/io.c b/arch/arm/lib/io.c index d01877934..c94a2ba07 100644 --- a/arch/arm/lib/io.c +++ b/arch/arm/lib/io.c @@ -18,7 +18,7 @@ void _memcpy_fromio(void * to, unsigned long from, unsigned long count) * Copy data from "real" memory space to IO memory space. * This needs to be optimized. */ -void _memcpy_toio(unsigned long to, void * from, unsigned long count) +void _memcpy_toio(unsigned long to, const void * from, unsigned long count) { while (count) { count--; diff --git a/arch/arm/lib/semaphore.S b/arch/arm/lib/semaphore.S new file mode 100644 index 000000000..778fafc1c --- /dev/null +++ b/arch/arm/lib/semaphore.S @@ -0,0 +1,34 @@ +/* + * linux/arch/arm/lib/semaphore.S + * + * Idea from i386 code, Copyright Linus Torvalds. + * Converted for ARM by Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * The semaphore operations have a special calling sequence + * that allows us to keep the distruption of the main code + * path to a minimum. These routines save and restore the + * registers that will be touched by __down etc. + */ +ENTRY(__down_failed) + stmfd sp!, {r0 - r3, ip, lr} + bl SYMBOL_NAME(__down) + LOADREGS(fd, sp!, {r0 - r3, ip, pc}) + +ENTRY(__down_interruptible_failed) + stmfd sp!, {r1 - r3, ip, lr} + bl SYMBOL_NAME(__down_interruptible) + LOADREGS(fd, sp!, {r1 - r3, ip, pc}) + +ENTRY(__down_trylock_failed) + stmfd sp!, {r1 - r3, ip, lr} + bl SYMBOL_NAME(__down_trylock) + LOADREGS(fd, sp!, {r1 - r3, ip, pc}) + +ENTRY(__up_wakeup) + stmfd sp!, {r0 - r3, ip, lr} + bl SYMBOL_NAME(__up) + LOADREGS(fd, sp!, {r0 - r3, ip, pc}) diff --git a/arch/arm/mm/fault-common.c b/arch/arm/mm/fault-common.c index 810dea699..1251525da 100644 --- a/arch/arm/mm/fault-common.c +++ b/arch/arm/mm/fault-common.c @@ -26,25 +26,14 @@ void __bad_pmd_kernel(pmd_t *pmd) set_pmd(pmd, mk_kernel_pmd(BAD_PAGETABLE)); } -static void -kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs, - struct task_struct *tsk, struct mm_struct *mm) +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) { - char *reason; - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ pgd_t *pgd; - if (addr < PAGE_SIZE) - reason = "NULL pointer dereference"; - else - reason = "paging request"; - - printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n", - reason, addr); - printk(KERN_ALERT "memmap = %08lX, pgd = %p\n", tsk->tss.memmap, mm->pgd); pgd = pgd_offset(mm, addr); printk(KERN_ALERT "*pgd = %08lx", pgd_val(*pgd)); @@ -77,6 +66,27 @@ kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs, } while(0); printk("\n"); +} + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +static void +kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs, + struct task_struct *tsk, struct mm_struct *mm) +{ + char *reason; + + if (addr < PAGE_SIZE) + reason = "NULL pointer dereference"; + else + reason = "paging request"; + + printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n", + reason, addr); + printk(KERN_ALERT "memmap = %08lX, pgd = %p\n", tsk->tss.memmap, mm->pgd); + show_pte(mm, addr); die("Oops", regs, mode); do_exit(SIGKILL); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 70d7c77b9..48e34214e 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -115,19 +115,19 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag { void * addr; struct vm_struct * area; - unsigned long offset; + unsigned long offset, last_addr; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; /* * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - size = PAGE_ALIGN(size + offset); - - /* - * Don't allow mappings that wrap.. - */ - if (!size || size > phys_addr + size) - return NULL; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - phys_addr; /* * Ok, go for it.. diff --git a/arch/arm/mm/proc-arm2,3.S b/arch/arm/mm/proc-arm2,3.S index 263d79708..7e4871fe2 100644 --- a/arch/arm/mm/proc-arm2,3.S +++ b/arch/arm/mm/proc-arm2,3.S @@ -202,15 +202,11 @@ _arm2_3_check_bugs: LC0: .word SYMBOL_NAME(page_nr) /* * Function: arm2_switch_to (struct task_struct *prev, struct task_struct *next) - * * Params : prev Old task structure * : next New task structure for process to run - * * Returns : prev - * * Purpose : Perform a task switch, saving the old processes state, and restoring * the new. - * * Notes : We don't fiddle with the FP registers here - we postpone this until * the new task actually uses FP. This way, we don't swap FP for tasks * that do not require it. @@ -316,15 +312,11 @@ _arm2_proc_init: _arm2_proc_fin: movs pc, lr /* * Function: arm3_switch_to (struct task_struct *prev, struct task_struct *next) - * * Params : prev Old task structure * : next New task structure for process to run - * * Returns : prev - * * Purpose : Perform a task switch, saving the old processes state, and restoring * the new. - * * Notes : We don't fiddle with the FP registers here - we postpone this until * the new task actually uses FP. This way, we don't swap FP for tasks * that do not require it. diff --git a/arch/arm/mm/proc-arm6,7.S b/arch/arm/mm/proc-arm6,7.S index b817ae2b4..d1f31e35d 100644 --- a/arch/arm/mm/proc-arm6,7.S +++ b/arch/arm/mm/proc-arm6,7.S @@ -74,14 +74,14 @@ _arm6_7_switch_to: str sp, [r0, #TSS_SAVE] @ Save sp_SVC ldr sp, [r1, #TSS_SAVE] @ Get saved sp_SVC ldr r2, [r1, #TSK_ADDR_LIMIT] + ldr r3, [r1, #TSS_MEMMAP] @ Page table pointer teq r2, #0 moveq r2, #DOM_KERNELDOMAIN movne r2, #DOM_USERDOMAIN mcr p15, 0, r2, c3, c0 @ Set domain reg - ldr r2, [r1, #TSS_MEMMAP] @ Page table pointer mov r1, #0 mcr p15, 0, r1, c7, c0, 0 @ flush cache - mcr p15, 0, r2, c2, c0, 0 @ update page table ptr + mcr p15, 0, r3, c2, c0, 0 @ update page table ptr mcr p15, 0, r1, c5, c0, 0 @ flush TLBs ldmfd sp!, {ip} msr spsr, ip @ Save tasks CPSR into SPSR for this return diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index ff55c8ffa..be9fad45e 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -29,11 +29,11 @@ _sa110_flush_cache_all: @ preserves r0 mov r2, #1 _sa110_flush_cache_all_r2: ldr r3, =Lclean_switch + ldr ip, =FLUSH_BASE ldr r1, [r3] ands r1, r1, #1 eor r1, r1, #1 str r1, [r3] - ldr ip, =FLUSH_BASE addne ip, ip, #32768 add r1, ip, #16384 @ only necessary for 16k 1: ldr r3, [ip], #32 @@ -226,12 +226,12 @@ _sa110_switch_to: ldr r2, [r0, #TSS_MEMMAP] @ Get old page tables str sp, [r0, #TSS_SAVE] @ Save sp_SVC ldr sp, [r1, #TSS_SAVE] @ Get saved sp_SVC - ldr r4, [r1, #TSK_ADDR_LIMIT] - teq r4, #0 - moveq r4, #DOM_KERNELDOMAIN - movne r4, #DOM_USERDOMAIN - mcr p15, 0, r4, c3, c0 @ Set segment + ldr r5, [r1, #TSK_ADDR_LIMIT] ldr r4, [r1, #TSS_MEMMAP] @ Page table pointer + teq r5, #0 + moveq r5, #DOM_KERNELDOMAIN + movne r5, #DOM_USERDOMAIN + mcr p15, 0, r5, c3, c0 @ Set segment /* * Flushing the cache is nightmarishly slow, so we take any excuse * to get out of it. If the old page table is the same as the new, @@ -288,7 +288,8 @@ _sa110_data_abort: */ .align 5 _sa110_set_pmd: str r1, [r0] - mcr p15, 0, r0, c7, c10, 1 @ clean D entry (drain is done by TLB fns) + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB (TLB bypasses WB) mov pc, lr /* @@ -318,6 +319,7 @@ _sa110_set_pte: str r1, [r0], #-1024 @ linux version str r2, [r0] @ hardware version mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry (drain is done by TLB fns) + mcr p15, 0, r0, c7, c10, 4 @ drain WB (TLB bypasses WB) mov pc, lr /* diff --git a/arch/arm/nwfpe/ARM-gcc.h b/arch/arm/nwfpe/ARM-gcc.h new file mode 100644 index 000000000..d726aa452 --- /dev/null +++ b/arch/arm/nwfpe/ARM-gcc.h @@ -0,0 +1,128 @@ + +/* +------------------------------------------------------------------------------- +One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. +------------------------------------------------------------------------------- +*/ +#define LITTLEENDIAN + +/* +------------------------------------------------------------------------------- +The macro `BITS64' can be defined to indicate that 64-bit integer types are +supported by the compiler. +------------------------------------------------------------------------------- +*/ +#define BITS64 + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines the most convenient type that holds +integers of at least as many bits as specified. For example, `uint8' should +be the most convenient type that can hold unsigned integers of as many as +8 bits. The `flag' type must be able to hold either a 0 or 1. For most +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +to the same as `int'. +------------------------------------------------------------------------------- +*/ +typedef char flag; +typedef unsigned char uint8; +typedef signed char int8; +typedef int uint16; +typedef int int16; +typedef unsigned int uint32; +typedef signed int int32; +#ifdef BITS64 +typedef unsigned long long int bits64; +typedef signed long long int sbits64; +#endif + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines a type that holds integers +of _exactly_ the number of bits specified. For instance, for most +implementation of C, `bits16' and `sbits16' should be `typedef'ed to +`unsigned short int' and `signed short int' (or `short int'), respectively. +------------------------------------------------------------------------------- +*/ +typedef unsigned char bits8; +typedef signed char sbits8; +typedef unsigned short int bits16; +typedef signed short int sbits16; +typedef unsigned int bits32; +typedef signed int sbits32; +#ifdef BITS64 +typedef unsigned long long int uint64; +typedef signed long long int int64; +#endif + +#ifdef BITS64 +/* +------------------------------------------------------------------------------- +The `LIT64' macro takes as its argument a textual integer literal and if +necessary ``marks'' the literal as having a 64-bit integer type. For +example, the Gnu C Compiler (`gcc') requires that 64-bit literals be +appended with the letters `LL' standing for `long long', which is `gcc's +name for the 64-bit integer type. Some compilers may allow `LIT64' to be +defined as the identity macro: `#define LIT64( a ) a'. +------------------------------------------------------------------------------- +*/ +#define LIT64( a ) a##LL +#endif + +/* +------------------------------------------------------------------------------- +The macro `INLINE' can be used before functions that should be inlined. If +a compiler does not support explicit inlining, this macro should be defined +to be `static'. +------------------------------------------------------------------------------- +*/ +#define INLINE extern __inline__ + + +/* For use as a GCC soft-float library we need some special function names. */ + +#ifdef __LIBFLOAT__ + +/* Some 32-bit ops can be mapped straight across by just changing the name. */ +#define float32_add __addsf3 +#define float32_sub __subsf3 +#define float32_mul __mulsf3 +#define float32_div __divsf3 +#define int32_to_float32 __floatsisf +#define float32_to_int32_round_to_zero __fixsfsi +#define float32_to_uint32_round_to_zero __fixunssfsi + +/* These ones go through the glue code. To avoid namespace pollution + we rename the internal functions too. */ +#define float32_eq ___float32_eq +#define float32_le ___float32_le +#define float32_lt ___float32_lt + +/* All the 64-bit ops have to go through the glue, so we pull the same + trick. */ +#define float64_add ___float64_add +#define float64_sub ___float64_sub +#define float64_mul ___float64_mul +#define float64_div ___float64_div +#define int32_to_float64 ___int32_to_float64 +#define float64_to_int32_round_to_zero ___float64_to_int32_round_to_zero +#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero +#define float64_to_float32 ___float64_to_float32 +#define float32_to_float64 ___float32_to_float64 +#define float64_eq ___float64_eq +#define float64_le ___float64_le +#define float64_lt ___float64_lt + +#if 0 +#define float64_add __adddf3 +#define float64_sub __subdf3 +#define float64_mul __muldf3 +#define float64_div __divdf3 +#define int32_to_float64 __floatsidf +#define float64_to_int32_round_to_zero __fixdfsi +#define float64_to_uint32_round_to_zero __fixunsdfsi +#define float64_to_float32 __truncdfsf2 +#define float32_to_float64 __extendsfdf2 +#endif + +#endif diff --git a/arch/arm/nwfpe/ChangeLog b/arch/arm/nwfpe/ChangeLog new file mode 100644 index 000000000..e160d36c3 --- /dev/null +++ b/arch/arm/nwfpe/ChangeLog @@ -0,0 +1,20 @@ +1998-11-23 Scott Bambrough <scottb@corelcomputer.com> + + * README.FPE - fix typo in description of lfm/sfm instructions + * NOTES - Added file to describe known bugs/problems + * fpmodule.c - Changed version number to 0.94 + +1998-11-20 Scott Bambrough <scottb@corelcomputer.com> + + * README.FPE - fix description of URD, NRM instructions + * TODO - remove URD, NRM instructions from TODO list + * single_cpdo.c - implement URD, NRM + * double_cpdo.c - implement URD, NRM + * extended_cpdo.c - implement URD, NRM + +1998-11-19 Scott Bambrough <scottb@corelcomputer.com> + + * ChangeLog - Added this file to track changes made. + * fpa11.c - added code to initialize register types to typeNone + * fpa11_cpdt.c - fixed bug in storeExtended (typeExtended changed to + typeDouble in switch statement) diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile new file mode 100644 index 000000000..5db79c6d4 --- /dev/null +++ b/arch/arm/nwfpe/Makefile @@ -0,0 +1,31 @@ +# +# linux/arch/arm/nwfpe/Makefile +# +# Copyright (C) 1998, 1999 Philip Blundell +# + +NWFPE_OBJS := fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \ + fpmodule.o fpopcode.o softfloat.o \ + single_cpdo.o double_cpdo.o extended_cpdo.o + +ifeq ($(CONFIG_CPU_26),y) +NWFPE_OBJS += entry26.o +else +NWFPE_OBJS += entry.o +endif + +L_TARGET := math-emu.a + +ifeq ($(CONFIG_NWFPE),y) +L_OBJS = $(NWFPE_OBJS) +else + ifeq ($(CONFIG_NWFPE),m) + M_OBJS = nwfpe.o + MI_OBJS = $(NWFPE_OBJS) + endif +endif + +include $(TOPDIR)/Rules.make + +nwfpe.o: $(MI_OBJS) $(MIX_OBJS) + $(LD) $(LD_RFLAG) -r -o $@ $(MI_OBJS) $(MIX_OBJS) diff --git a/arch/arm/nwfpe/config.h b/arch/arm/nwfpe/config.h new file mode 100644 index 000000000..35f9d6336 --- /dev/null +++ b/arch/arm/nwfpe/config.h @@ -0,0 +1,31 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#if 1 +#define C_SYMBOL_NAME(foo) foo +#else +#define C_SYMBOL_NAME(foo) _##foo +#endif + +#endif diff --git a/arch/arm/nwfpe/double_cpdo.c b/arch/arm/nwfpe/double_cpdo.c new file mode 100644 index 000000000..e746c7a29 --- /dev/null +++ b/arch/arm/nwfpe/double_cpdo.c @@ -0,0 +1,293 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" + +extern FPA11 *fpa11; + +float64 getDoubleConstant(unsigned int); + +float64 float64_exp(float64 Fm); +float64 float64_ln(float64 Fm); +float64 float64_sin(float64 rFm); +float64 float64_cos(float64 rFm); +float64 float64_arcsin(float64 rFm); +float64 float64_arctan(float64 rFm); +float64 float64_log(float64 rFm); +float64 float64_tan(float64 rFm); +float64 float64_arccos(float64 rFm); +float64 float64_pow(float64 rFn,float64 rFm); +float64 float64_pol(float64 rFn,float64 rFm); + +unsigned int DoubleCPDO(const unsigned int opcode) +{ + float64 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + //fp_printk("DoubleCPDO(0x%08x)\n",opcode); + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getDoubleConstant(Fm); + } + else + { + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + rFm = float32_to_float64(fpa11->fpreg[Fm].fValue.fSingle); + break; + + case typeDouble: + rFm = fpa11->fpreg[Fm].fValue.fDouble; + break; + + case typeExtended: + // !! patb + //fp_printk("not implemented! why not?\n"); + //!! ScottB + // should never get here, if extended involved + // then other operand should be promoted then + // ExtendedCPDO called. + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + rFn = float32_to_float64(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + rFn = fpa11->fpreg[Fn].fValue.fDouble; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */ + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = rFm; + break; + + case MNF_CODE: + { + unsigned int *p = (unsigned int*)&rFm; + p[1] ^= 0x80000000; + fpa11->fpreg[Fd].fValue.fDouble = rFm; + } + break; + + case ABS_CODE: + { + unsigned int *p = (unsigned int*)&rFm; + p[1] &= 0x7fffffff; + fpa11->fpreg[Fd].fValue.fDouble = rFm; + } + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fValue.fDouble = + int32_to_float64(float64_to_int32(rFm)); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fpreg[Fd].fType = typeDouble; + return nRc; +} + +#if 0 +float64 float64_exp(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_ln(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_sin(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_cos(float64 rFm) +{ + return rFm; + //series +} + +#if 0 +float64 float64_arcsin(float64 rFm) +{ +//series +} + +float64 float64_arctan(float64 rFm) +{ + //series +} +#endif + +float64 float64_log(float64 rFm) +{ + return float64_div(float64_ln(rFm),getDoubleConstant(7)); +} + +float64 float64_tan(float64 rFm) +{ + return float64_div(float64_sin(rFm),float64_cos(rFm)); +} + +float64 float64_arccos(float64 rFm) +{ +return rFm; + //return float64_sub(halfPi,float64_arcsin(rFm)); +} + +float64 float64_pow(float64 rFn,float64 rFm) +{ + return float64_exp(float64_mul(rFm,float64_ln(rFn))); +} + +float64 float64_pol(float64 rFn,float64 rFm) +{ + return float64_arctan(float64_div(rFn,rFm)); +} +#endif diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S new file mode 100644 index 000000000..6f0077fbe --- /dev/null +++ b/arch/arm/nwfpe/entry.S @@ -0,0 +1,126 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* This is the kernel's entry point into the floating point emulator. +It is called from the kernel with code similar to this: + + adrsvc al, r9, ret_from_exception @ r9 = normal FP return + adrsvc al, lr, fpundefinstr @ lr = undefined instr return + + get_current_task r10 + mov r8, #1 + strb r8, [r10, #TSK_USED_MATH] @ set current->used_math + add r10, r10, #TSS_FPESAVE @ r10 = workspace + ldr r4, .LC2 + ldr pc, [r4] @ Call FP emulator entry point + +The kernel expects the emulator to return via one of two possible +points of return it passes to the emulator. The emulator, if +successful in its emulation, jumps to ret_from_exception (passed in +r9) and the kernel takes care of returning control from the trap to +the user code. If the emulator is unable to emulate the instruction, +it returns via _fpundefinstr (passed via lr) and the kernel halts the +user program with a core dump. + +On entry to the emulator r10 points to an area of private FP workspace +reserved in the thread structure for this process. This is where the +emulator saves its registers across calls. The first word of this area +is used as a flag to detect the first time a process uses floating point, +so that the emulator startup cost can be avoided for tasks that don't +want it. + +This routine does three things: + +1) It saves SP into a variable called userRegisters. The kernel has +created a struct pt_regs on the stack and saved the user registers +into it. See /usr/include/asm/proc/ptrace.h for details. The +emulator code uses userRegisters as the base of an array of words from +which the contents of the registers can be extracted. + +2) It calls EmulateAll to emulate a floating point instruction. +EmulateAll returns 1 if the emulation was successful, or 0 if not. + +3) If an instruction has been emulated successfully, it looks ahead at +the next instruction. If it is a floating point instruction, it +executes the instruction, without returning to user space. In this +way it repeatedly looks ahead and executes floating point instructions +until it encounters a non floating point instruction, at which time it +returns via _fpreturn. + +This is done to reduce the effect of the trap overhead on each +floating point instructions. GCC attempts to group floating point +instructions to allow the emulator to spread the cost of the trap over +several floating point instructions. */ + + .globl nwfpe_enter +nwfpe_enter: + /* ?? Could put userRegisters and fpa11 into fixed regs during + emulation. This would reduce load/store overhead at the expense + of stealing two regs from the register allocator. Not sure if + it's worth it. */ + ldr r4, =userRegisters + str sp, [r4] @ save pointer to user regs + ldr r4, =fpa11 + str r10, [r4] @ store pointer to our state + mov r4, sp @ use r4 for local pointer + mov r10, lr @ save the failure-return addresses + + ldr r5, [r4, #60] @ get contents of PC; + ldr r0, [r5, #-4] @ get actual instruction into r0 +emulate: + bl EmulateAll @ emulate the instruction + cmp r0, #0 @ was emulation successful + moveq pc, r10 @ no, return failure + +next: +__x1: ldrt r6, [r5], #4 @ get the next instruction and + @ increment PC + + and r2, r6, #0x0F000000 @ test for FP insns + teq r2, #0x0C000000 + teqne r2, #0x0D000000 + teqne r2, #0x0E000000 + movne pc, r9 @ return ok if not a fp insn + + str r5, [r4, #60] @ update PC copy in regs + + mov r0, r6 @ save a copy + ldr r1, [r4, #64] @ fetch the condition codes + bl checkCondition @ check the condition + cmp r0, #0 @ r0 = 0 ==> condition failed + + @ if condition code failed to match, next insn + beq next @ get the next instruction; + + mov r0, r6 @ prepare for EmulateAll() + b emulate @ if r0 != 0, goto EmulateAll + + @ We need to be prepared for the instruction at __x1 to fault. + @ Emit the appropriate exception gunk to fix things up. + .section .fixup,"ax" + .align +__f1: mov pc, r9 + .previous + .section __ex_table,"a" + .align 3 + .long __x1, __f1 + .previous diff --git a/arch/arm/nwfpe/entry26.S b/arch/arm/nwfpe/entry26.S new file mode 100644 index 000000000..6b1ec3354 --- /dev/null +++ b/arch/arm/nwfpe/entry26.S @@ -0,0 +1,112 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "../lib/constants.h" + +/* This is the kernel's entry point into the floating point emulator. +It is called from the kernel with code similar to this: + + mov fp, #0 + teqp pc, #I_BIT | MODE_SVC + ldr r4, .LC2 + ldr pc, [r4] @ Call FP module USR entry point + +The kernel expects the emulator to return via one of two possible +points of return it passes to the emulator. The emulator, if +successful in its emulation, jumps to ret_from_exception and the +kernel takes care of returning control from the trap to the user code. +If the emulator is unable to emulate the instruction, it returns to +fpundefinstr and the kernel halts the user program with a core dump. + +This routine does four things: + +1) It saves SP into a variable called userRegisters. The kernel has +created a struct pt_regs on the stack and saved the user registers +into it. See /usr/include/asm/proc/ptrace.h for details. The +emulator code uses userRegisters as the base of an array of words from +which the contents of the registers can be extracted. + +2) It locates the FP emulator work area within the TSS structure and +points `fpa11' to it. + +3) It calls EmulateAll to emulate a floating point instruction. +EmulateAll returns 1 if the emulation was successful, or 0 if not. + +4) If an instruction has been emulated successfully, it looks ahead at +the next instruction. If it is a floating point instruction, it +executes the instruction, without returning to user space. In this +way it repeatedly looks ahead and executes floating point instructions +until it encounters a non floating point instruction, at which time it +returns via _fpreturn. + +This is done to reduce the effect of the trap overhead on each +floating point instructions. GCC attempts to group floating point +instructions to allow the emulator to spread the cost of the trap over +several floating point instructions. */ + + .globl nwfpe_enter +nwfpe_enter: + ldr r4, =userRegisters + str sp, [r4] @ save pointer to user regs + + mov r10, sp, lsr #13 @ find workspace + mov r10, r10, lsl #13 + add r10, r10, #TSS_FPESAVE + + ldr r4, =fpa11 + str r10, [r4] @ store pointer to our state + mov r4, sp @ use r4 for local pointer + + ldr r5, [r4, #60] @ get contents of PC + bic r5, r5, #0xfc000003 + ldr r0, [r5, #-4] @ get actual instruction into r0 + bl EmulateAll @ emulate the instruction +1: cmp r0, #0 @ was emulation successful + beq fpundefinstr @ no, return failure + +next: + ldrt r6, [r5], #4 @ get the next instruction and + @ increment PC + + and r2, r6, #0x0F000000 @ test for FP insns + teq r2, #0x0C000000 + teqne r2, #0x0D000000 + teqne r2, #0x0E000000 + bne ret_from_exception @ return ok if not a fp insn + + ldr r9, [r4, #60] @ get new condition codes + and r9, r9, #0xfc000003 + orr r7, r5, r9 + str r7, [r4, #60] @ update PC copy in regs + + mov r0, r6 @ save a copy + mov r1, r9 @ fetch the condition codes + bl checkCondition @ check the condition + cmp r0, #0 @ r0 = 0 ==> condition failed + + @ if condition code failed to match, next insn + beq next @ get the next instruction; + + mov r0, r6 @ prepare for EmulateAll() + adr lr, 1b + orr lr, lr, #3 + b EmulateAll @ if r0 != 0, goto EmulateAll diff --git a/arch/arm/nwfpe/extended_cpdo.c b/arch/arm/nwfpe/extended_cpdo.c new file mode 100644 index 000000000..1c5c66180 --- /dev/null +++ b/arch/arm/nwfpe/extended_cpdo.c @@ -0,0 +1,276 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" + +floatx80 getExtendedConstant(unsigned int); + +floatx80 floatx80_exp(floatx80 Fm); +floatx80 floatx80_ln(floatx80 Fm); +floatx80 floatx80_sin(floatx80 rFm); +floatx80 floatx80_cos(floatx80 rFm); +floatx80 floatx80_arcsin(floatx80 rFm); +floatx80 floatx80_arctan(floatx80 rFm); +floatx80 floatx80_log(floatx80 rFm); +floatx80 floatx80_tan(floatx80 rFm); +floatx80 floatx80_arccos(floatx80 rFm); +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm); +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm); + +unsigned int ExtendedCPDO(const unsigned int opcode) +{ + floatx80 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + //fp_printk("ExtendedCPDO(0x%08x)\n",opcode); + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getExtendedConstant(Fm); + } + else + { + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fValue.fSingle); + break; + + case typeDouble: + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fValue.fDouble); + break; + + case typeExtended: + rFm = fpa11->fpreg[Fm].fValue.fExtended; + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble); + break; + + case typeExtended: + rFn = fpa11->fpreg[Fn].fValue.fExtended; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = rFm; + break; + + case MNF_CODE: + rFm.high ^= 0x8000; + fpa11->fpreg[Fd].fValue.fExtended = rFm; + break; + + case ABS_CODE: + rFm.high &= 0x7fff; + fpa11->fpreg[Fd].fValue.fExtended = rFm; + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fValue.fExtended = + int32_to_floatx80(floatx80_to_int32(rFm)); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fpreg[Fd].fType = typeExtended; + return nRc; +} + +#if 0 +floatx80 floatx80_exp(floatx80 Fm) +{ +//series +} + +floatx80 floatx80_ln(floatx80 Fm) +{ +//series +} + +floatx80 floatx80_sin(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_cos(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_arcsin(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_arctan(floatx80 rFm) +{ + //series +} + +floatx80 floatx80_log(floatx80 rFm) +{ + return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7)); +} + +floatx80 floatx80_tan(floatx80 rFm) +{ + return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm)); +} + +floatx80 floatx80_arccos(floatx80 rFm) +{ + //return floatx80_sub(halfPi,floatx80_arcsin(rFm)); +} + +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm) +{ + return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn))); +} + +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm) +{ + return floatx80_arctan(floatx80_div(rFn,rFm)); +} +#endif diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c new file mode 100644 index 000000000..506821ca8 --- /dev/null +++ b/arch/arm/nwfpe/fpa11.c @@ -0,0 +1,206 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "fpa11.h" +#include "milieu.h" +#include "fpopcode.h" + +#include "fpmodule.h" +#include "fpmodule.inl" + +/* forward declarations */ +unsigned int EmulateCPDO(const unsigned int); +unsigned int EmulateCPDT(const unsigned int); +unsigned int EmulateCPRT(const unsigned int); + +/* Emulator registers */ +FPA11 *fpa11; + +/* Reset the FPA11 chip. Called to initialize and reset the emulator. */ +void resetFPA11(void) +{ + int i; + /* initialize the registers */ + for (i=0;i<=7;i++) + { + fpa11->fpreg[i].fType = typeNone; + } + + /* FPSR: set system id to FP_EMULATOR, clear all other bits */ + fpa11->fpsr = FP_EMULATOR; + + /* FPCR: set SB, AB and DA bits, clear all others */ +#if MAINTAIN_FPCR + fpa11->fpcr = MASK_RESET; +#endif +} + +void SetRoundingMode(const unsigned int opcode) +{ +#if MAINTAIN_FPCR + fpa11->fpcr &= ~MASK_ROUNDING_MODE; +#endif + switch (opcode & MASK_ROUNDING_MODE) + { + default: + case ROUND_TO_NEAREST: + float_rounding_mode = float_round_nearest_even; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_NEAREST; +#endif + break; + + case ROUND_TO_PLUS_INFINITY: + float_rounding_mode = float_round_up; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_PLUS_INFINITY; +#endif + break; + + case ROUND_TO_MINUS_INFINITY: + float_rounding_mode = float_round_down; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_MINUS_INFINITY; +#endif + break; + + case ROUND_TO_ZERO: + float_rounding_mode = float_round_to_zero; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_ZERO; +#endif + break; + } +} + +void SetRoundingPrecision(const unsigned int opcode) +{ +#if MAINTAIN_FPCR + fpa11->fpcr &= ~MASK_ROUNDING_PRECISION; +#endif + switch (opcode & MASK_ROUNDING_PRECISION) + { + case ROUND_SINGLE: + floatx80_rounding_precision = 32; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_SINGLE; +#endif + break; + + case ROUND_DOUBLE: + floatx80_rounding_precision = 64; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_DOUBLE; +#endif + break; + + case ROUND_EXTENDED: + floatx80_rounding_precision = 80; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_EXTENDED; +#endif + break; + + default: floatx80_rounding_precision = 80; + } +} + +/* Emulate the instruction in the opcode. */ +unsigned int EmulateAll(unsigned int opcode) +{ + unsigned int nRc = 0; + + if (fpa11->initflag == 0) /* good place for __builtin_expect */ + { + resetFPA11(); + SetRoundingMode(ROUND_TO_NEAREST); + SetRoundingPrecision(ROUND_EXTENDED); + fpa11->initflag = 1; + } + + if (TEST_OPCODE(opcode,MASK_CPRT)) + { + /* Emulate conversion opcodes. */ + /* Emulate register transfer opcodes. */ + /* Emulate comparison opcodes. */ + nRc = EmulateCPRT(opcode); + } + else if (TEST_OPCODE(opcode,MASK_CPDO)) + { + /* Emulate monadic arithmetic opcodes. */ + /* Emulate dyadic arithmetic opcodes. */ + nRc = EmulateCPDO(opcode); + } + else if (TEST_OPCODE(opcode,MASK_CPDT)) + { + /* Emulate load/store opcodes. */ + /* Emulate load/store multiple opcodes. */ + nRc = EmulateCPDT(opcode); + } + else + { + /* Invalid instruction detected. Return FALSE. */ + nRc = 0; + } + + return(nRc); +} + +#if 0 +unsigned int EmulateAll1(unsigned int opcode) +{ + switch ((opcode >> 24) & 0xf) + { + case 0xc: + case 0xd: + if ((opcode >> 20) & 0x1) + { + switch ((opcode >> 8) & 0xf) + { + case 0x1: return PerformLDF(opcode); break; + case 0x2: return PerformLFM(opcode); break; + default: return 0; + } + } + else + { + switch ((opcode >> 8) & 0xf) + { + case 0x1: return PerformSTF(opcode); break; + case 0x2: return PerformSFM(opcode); break; + default: return 0; + } + } + break; + + case 0xe: + if (opcode & 0x10) + return EmulateCPDO(opcode); + else + return EmulateCPRT(opcode); + break; + + default: return 0; + } +} +#endif + diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h new file mode 100644 index 000000000..4a47a29f4 --- /dev/null +++ b/arch/arm/nwfpe/fpa11.h @@ -0,0 +1,61 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPA11_H__ +#define __FPA11_H__ + +/* includes */ +#include "fpsr.h" /* FP control and status register definitions */ +#include "softfloat.h" + +#define typeNone 0x00 +#define typeSingle 0x01 +#define typeDouble 0x02 +#define typeExtended 0x03 + +typedef struct tagFPREG { + unsigned int fType; + union { + float32 fSingle; + float64 fDouble; + floatx80 fExtended; + } fValue; +} FPREG; + +/* FPA11 device model */ +typedef struct tagFPA11 { + int initflag; /* this is special. The kernel guarantees + to set it to 0 when a thread is launched, + so we can use it to detect whether this + instance of the emulator needs to be + initialised. */ + FPREG fpreg[8]; /* 8 floating point registers */ + FPSR fpsr; /* floating point status register */ + FPCR fpcr; /* floating point control register */ +} FPA11; + +extern void resetFPA11(void); +extern void SetRoundingMode(const unsigned int); +extern void SetRoundingPrecision(const unsigned int); + +extern FPA11 *fpa11; + +#endif diff --git a/arch/arm/nwfpe/fpa11.inl b/arch/arm/nwfpe/fpa11.inl new file mode 100644 index 000000000..321ab7c1c --- /dev/null +++ b/arch/arm/nwfpe/fpa11.inl @@ -0,0 +1,47 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" + +/* Read and write floating point status register */ +extern __inline__ unsigned int readFPSR(void) +{ + return(fpa11->fpsr); +} + +extern __inline__ void writeFPSR(FPSR reg) +{ + /* the sysid byte in the status register is readonly */ + fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID); +} + +/* Read and write floating point control register */ +extern __inline__ FPCR readFPCR(void) +{ + /* clear SB, AB and DA bits before returning FPCR */ + return(fpa11->fpcr & ~MASK_RFC); +} + +extern __inline__ void writeFPCR(FPCR reg) +{ + fpa11->fpcr &= ~MASK_WFC; /* clear SB, AB and DA bits */ + fpa11->fpcr |= (reg & MASK_WFC); /* write SB, AB and DA bits */ +} diff --git a/arch/arm/nwfpe/fpa11_cpdo.c b/arch/arm/nwfpe/fpa11_cpdo.c new file mode 100644 index 000000000..c337c553a --- /dev/null +++ b/arch/arm/nwfpe/fpa11_cpdo.c @@ -0,0 +1,117 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "fpa11.h" +#include "fpopcode.h" + +unsigned int SingleCPDO(const unsigned int opcode); +unsigned int DoubleCPDO(const unsigned int opcode); +unsigned int ExtendedCPDO(const unsigned int opcode); + +unsigned int EmulateCPDO(const unsigned int opcode) +{ + unsigned int Fd, nType, nDest, nRc = 1; + + //fp_printk("EmulateCPDO(0x%08x)\n",opcode); + + /* Get the destination size. If not valid let Linux perform + an invalid instruction trap. */ + nDest = getDestinationSize(opcode); + if (typeNone == nDest) return 0; + + SetRoundingMode(opcode); + + /* Compare the size of the operands in Fn and Fm. + Choose the largest size and perform operations in that size, + in order to make use of all the precision of the operands. + If Fm is a constant, we just grab a constant of a size + matching the size of the operand in Fn. */ + if (MONADIC_INSTRUCTION(opcode)) + nType = nDest; + else + nType = fpa11->fpreg[getFn(opcode)].fType; + + if (!CONSTANT_FM(opcode)) + { + register unsigned int Fm = getFm(opcode); + if (nType < fpa11->fpreg[Fm].fType) + { + nType = fpa11->fpreg[Fm].fType; + } + } + + switch (nType) + { + case typeSingle : nRc = SingleCPDO(opcode); break; + case typeDouble : nRc = DoubleCPDO(opcode); break; + case typeExtended : nRc = ExtendedCPDO(opcode); break; + default : nRc = 0; + } + + /* If the operation succeeded, check to see if the result in the + destination register is the correct size. If not force it + to be. */ + Fd = getFd(opcode); + nType = fpa11->fpreg[Fd].fType; + if ((0 != nRc) && (nDest != nType)) + { + switch (nDest) + { + case typeSingle: + { + if (typeDouble == nType) + fpa11->fpreg[Fd].fValue.fSingle = + float64_to_float32(fpa11->fpreg[Fd].fValue.fDouble); + else + fpa11->fpreg[Fd].fValue.fSingle = + floatx80_to_float32(fpa11->fpreg[Fd].fValue.fExtended); + } + break; + + case typeDouble: + { + if (typeSingle == nType) + fpa11->fpreg[Fd].fValue.fDouble = + float32_to_float64(fpa11->fpreg[Fd].fValue.fSingle); + else + fpa11->fpreg[Fd].fValue.fDouble = + floatx80_to_float64(fpa11->fpreg[Fd].fValue.fExtended); + } + break; + + case typeExtended: + { + if (typeSingle == nType) + fpa11->fpreg[Fd].fValue.fExtended = + float32_to_floatx80(fpa11->fpreg[Fd].fValue.fSingle); + else + fpa11->fpreg[Fd].fValue.fExtended = + float64_to_floatx80(fpa11->fpreg[Fd].fValue.fDouble); + } + break; + } + + fpa11->fpreg[Fd].fType = nDest; + } + + return nRc; +} diff --git a/arch/arm/nwfpe/fpa11_cpdt.c b/arch/arm/nwfpe/fpa11_cpdt.c new file mode 100644 index 000000000..9617a79a3 --- /dev/null +++ b/arch/arm/nwfpe/fpa11_cpdt.c @@ -0,0 +1,330 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" +#include "fpmodule.h" +#include "fpmodule.inl" + +#include <asm/uaccess.h> + +extern __inline__ +void loadSingle(const unsigned int Fn,const unsigned int *pMem) +{ + fpa11->fpreg[Fn].fType = typeSingle; + get_user(fpa11->fpreg[Fn].fValue.fSingle, pMem); +} + +extern __inline__ +void loadDouble(const unsigned int Fn,const unsigned int *pMem) +{ + unsigned int *p; + p = (unsigned int*)&fpa11->fpreg[Fn].fValue.fDouble; + fpa11->fpreg[Fn].fType = typeDouble; + get_user(p[0], &pMem[1]); + get_user(p[1], &pMem[0]); /* sign & exponent */ +} + +extern __inline__ +void loadExtended(const unsigned int Fn,const unsigned int *pMem) +{ + unsigned int *p; + p = (unsigned int*)&fpa11->fpreg[Fn].fValue.fExtended; + fpa11->fpreg[Fn].fType = typeExtended; + get_user(p[0], &pMem[0]); /* sign & exponent */ + get_user(p[1], &pMem[2]); /* ls bits */ + get_user(p[2], &pMem[1]); /* ms bits */ +} + +extern __inline__ +void loadMultiple(const unsigned int Fn,const unsigned int *pMem) +{ + register unsigned int *p; + unsigned long x; + + p = (unsigned int*)&(fpa11->fpreg[Fn].fValue); + get_user(x, &pMem[0]); + fpa11->fpreg[Fn].fType = (x >> 14) & 0x00000003; + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + case typeDouble: + { + get_user(p[0], &pMem[2]); /* Single */ + get_user(p[1], &pMem[1]); /* double msw */ + p[2] = 0; /* empty */ + } + break; + + case typeExtended: + { + get_user(p[1], &pMem[2]); + get_user(p[2], &pMem[1]); /* msw */ + p[0] = (x & 0x80003fff); + } + break; + } +} + +extern __inline__ +void storeSingle(const unsigned int Fn,unsigned int *pMem) +{ + float32 val; + register unsigned int *p = (unsigned int*)&val; + + switch (fpa11->fpreg[Fn].fType) + { + case typeDouble: + val = float64_to_float32(fpa11->fpreg[Fn].fValue.fDouble); + break; + + case typeExtended: + val = floatx80_to_float32(fpa11->fpreg[Fn].fValue.fExtended); + break; + + default: val = fpa11->fpreg[Fn].fValue.fSingle; + } + + put_user(p[0], pMem); +} + +extern __inline__ +void storeDouble(const unsigned int Fn,unsigned int *pMem) +{ + float64 val; + register unsigned int *p = (unsigned int*)&val; + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + val = float32_to_float64(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeExtended: + val = floatx80_to_float64(fpa11->fpreg[Fn].fValue.fExtended); + break; + + default: val = fpa11->fpreg[Fn].fValue.fDouble; + } + put_user(p[1], &pMem[0]); /* msw */ + put_user(p[0], &pMem[1]); /* lsw */ +} + +extern __inline__ +void storeExtended(const unsigned int Fn,unsigned int *pMem) +{ + floatx80 val; + register unsigned int *p = (unsigned int*)&val; + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + val = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + val = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble); + break; + + default: val = fpa11->fpreg[Fn].fValue.fExtended; + } + + put_user(p[0], &pMem[0]); /* sign & exp */ + put_user(p[1], &pMem[2]); + put_user(p[2], &pMem[1]); /* msw */ +} + +extern __inline__ +void storeMultiple(const unsigned int Fn,unsigned int *pMem) +{ + register unsigned int nType, *p; + + p = (unsigned int*)&(fpa11->fpreg[Fn].fValue); + nType = fpa11->fpreg[Fn].fType; + + switch (nType) + { + case typeSingle: + case typeDouble: + { + put_user(p[0], &pMem[2]); /* single */ + put_user(p[1], &pMem[1]); /* double msw */ + put_user(nType << 14, &pMem[0]); + } + break; + + case typeExtended: + { + put_user(p[2], &pMem[1]); /* msw */ + put_user(p[1], &pMem[2]); + put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]); + } + break; + } +} + +unsigned int PerformLDF(const unsigned int opcode) +{ + unsigned int *pBase, *pAddress, *pFinal, nRc = 1; + + //fp_printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode)); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case TRANSFER_SINGLE : loadSingle(getFd(opcode),pAddress); break; + case TRANSFER_DOUBLE : loadDouble(getFd(opcode),pAddress); break; + case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break; + default: nRc = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return nRc; +} + +unsigned int PerformSTF(const unsigned int opcode) +{ + unsigned int *pBase, *pAddress, *pFinal, nRc = 1; + + //fp_printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode)); + SetRoundingMode(ROUND_TO_NEAREST); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case TRANSFER_SINGLE : storeSingle(getFd(opcode),pAddress); break; + case TRANSFER_DOUBLE : storeDouble(getFd(opcode),pAddress); break; + case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break; + default: nRc = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return nRc; +} + +unsigned int PerformLFM(const unsigned int opcode) +{ + unsigned int i, Fd, *pBase, *pAddress, *pFinal; + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + Fd = getFd(opcode); + for (i=getRegisterCount(opcode);i>0;i--) + { + loadMultiple(Fd,pAddress); + pAddress += 3; Fd++; + if (Fd == 8) Fd = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return 1; +} + +unsigned int PerformSFM(const unsigned int opcode) +{ + unsigned int i, Fd, *pBase, *pAddress, *pFinal; + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + Fd = getFd(opcode); + for (i=getRegisterCount(opcode);i>0;i--) + { + storeMultiple(Fd,pAddress); + pAddress += 3; Fd++; + if (Fd == 8) Fd = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return 1; +} + +#if 1 +unsigned int EmulateCPDT(const unsigned int opcode) +{ + unsigned int nRc = 0; + + //fp_printk("EmulateCPDT(0x%08x)\n",opcode); + + if (LDF_OP(opcode)) + { + nRc = PerformLDF(opcode); + } + else if (LFM_OP(opcode)) + { + nRc = PerformLFM(opcode); + } + else if (STF_OP(opcode)) + { + nRc = PerformSTF(opcode); + } + else if (SFM_OP(opcode)) + { + nRc = PerformSFM(opcode); + } + else + { + nRc = 0; + } + + return nRc; +} +#endif diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c new file mode 100644 index 000000000..bfe13ba1f --- /dev/null +++ b/arch/arm/nwfpe/fpa11_cprt.c @@ -0,0 +1,313 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell, 1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "milieu.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" +#include "fpa11.inl" +#include "fpmodule.h" +#include "fpmodule.inl" + +extern flag floatx80_is_nan(floatx80); +extern flag float64_is_nan( float64); +extern flag float32_is_nan( float32); + +void SetRoundingMode(const unsigned int opcode); + +unsigned int PerformFLT(const unsigned int opcode); +unsigned int PerformFIX(const unsigned int opcode); + +static unsigned int +PerformComparison(const unsigned int opcode); + +unsigned int EmulateCPRT(const unsigned int opcode) +{ + unsigned int nRc = 1; + + //fp_printk("EmulateCPRT(0x%08x)\n",opcode); + + if (opcode & 0x800000) + { + /* This is some variant of a comparison (PerformComparison will + sort out which one). Since most of the other CPRT + instructions are oddball cases of some sort or other it makes + sense to pull this out into a fast path. */ + return PerformComparison(opcode); + } + + /* Hint to GCC that we'd like a jump table rather than a load of CMPs */ + switch ((opcode & 0x700000) >> 20) + { + case FLT_CODE >> 20: nRc = PerformFLT(opcode); break; + case FIX_CODE >> 20: nRc = PerformFIX(opcode); break; + + case WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break; + case RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break; + +#if 0 + /* ?? Not at all sure about the mode checks here. Linux never + calls the emulator from a non-USR fault but we always run in SVC + mode. Is there even any point trying to emulate the way FPA11 + behaves in this respect? + + No - and I quote: 'The FPCR may only be present in some + implementations: it is there to control the hardware in an + implementation-specific manner, ... The user mode of the + ARM is not permitted to use this register, and the WFC and + RFC instructions will trap if tried from user mode.' + Therefore, we do not provide the RFC and WFC instructions. + (rmk, 3/05/1999) + */ + case WFC_CODE >> 20: + { + int mode = 0; + __asm__ volatile ("mrs %0, cpsr; and %0, %0, #0x1f;" : : "g" (mode)); + nRc = (0x13 == mode) ? 1 : 0; /* in SVC processor mode? */ + if (nRc) writeFPCR(readRegister(getRd(opcode))); + } + break; + + case RFC_CODE >> 20: + { + int mode = 0; + __asm__ volatile ("mrs %0, cpsr; and %0, %0, #0x1f;" : : "g" (mode)); + nRc = (0x13 == mode) ? 1 : 0; /* in SVC processor mode? */ + if (nRc) writeRegister(getRd(opcode),readFPCR()); break; + } + break; +#endif + + default: nRc = 0; + } + + return nRc; +} + +unsigned int PerformFLT(const unsigned int opcode) +{ + unsigned int nRc = 1; + SetRoundingMode(opcode); + SetRoundingPrecision(opcode); + + switch (opcode & MASK_ROUNDING_PRECISION) + { + case ROUND_SINGLE: + { + fpa11->fpreg[getFn(opcode)].fType = typeSingle; + fpa11->fpreg[getFn(opcode)].fValue.fSingle = + int32_to_float32(readRegister(getRd(opcode))); + } + break; + + case ROUND_DOUBLE: + { + fpa11->fpreg[getFn(opcode)].fType = typeDouble; + fpa11->fpreg[getFn(opcode)].fValue.fDouble = + int32_to_float64(readRegister(getRd(opcode))); + } + break; + + case ROUND_EXTENDED: + { + fpa11->fpreg[getFn(opcode)].fType = typeExtended; + fpa11->fpreg[getFn(opcode)].fValue.fExtended = + int32_to_floatx80(readRegister(getRd(opcode))); + } + break; + + default: nRc = 0; + } + + return nRc; +} + +unsigned int PerformFIX(const unsigned int opcode) +{ + unsigned int nRc = 1; + unsigned int Fn = getFm(opcode); + + SetRoundingMode(opcode); + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + { + writeRegister(getRd(opcode), + float32_to_int32(fpa11->fpreg[Fn].fValue.fSingle)); + } + break; + + case typeDouble: + { + writeRegister(getRd(opcode), + float64_to_int32(fpa11->fpreg[Fn].fValue.fDouble)); + } + break; + + case typeExtended: + { + writeRegister(getRd(opcode), + floatx80_to_int32(fpa11->fpreg[Fn].fValue.fExtended)); + } + break; + + default: nRc = 0; + } + + return nRc; +} + + +static unsigned int __inline__ +PerformComparisonOperation(floatx80 Fn, floatx80 Fm) +{ + unsigned int flags = 0; + + /* test for less than condition */ + if (floatx80_lt(Fn,Fm)) + { + flags |= CC_NEGATIVE; + } + + /* test for equal condition */ + if (floatx80_eq(Fn,Fm)) + { + flags |= CC_ZERO; + } + + /* test for greater than or equal condition */ + if (floatx80_lt(Fm,Fn)) + { + flags |= CC_CARRY; + } + + writeConditionCodes(flags); + return 1; +} + +/* This instruction sets the flags N, Z, C, V in the FPSR. */ + +static unsigned int PerformComparison(const unsigned int opcode) +{ + unsigned int Fn, Fm; + floatx80 rFn, rFm; + int e_flag = opcode & 0x400000; /* 1 if CxFE */ + int n_flag = opcode & 0x200000; /* 1 if CNxx */ + unsigned int flags = 0; + + //fp_printk("PerformComparison(0x%08x)\n",opcode); + + Fn = getFn(opcode); + Fm = getFm(opcode); + + /* Check for unordered condition and convert all operands to 80-bit + format. + ?? Might be some mileage in avoiding this conversion if possible. + Eg, if both operands are 32-bit, detect this and do a 32-bit + comparison (cheaper than an 80-bit one). */ + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + //fp_printk("single.\n"); + if (float32_is_nan(fpa11->fpreg[Fn].fValue.fSingle)) + goto unordered; + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + //fp_printk("double.\n"); + if (float64_is_nan(fpa11->fpreg[Fn].fValue.fDouble)) + goto unordered; + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble); + break; + + case typeExtended: + //fp_printk("extended.\n"); + if (floatx80_is_nan(fpa11->fpreg[Fn].fValue.fExtended)) + goto unordered; + rFn = fpa11->fpreg[Fn].fValue.fExtended; + break; + + default: return 0; + } + + if (CONSTANT_FM(opcode)) + { + //fp_printk("Fm is a constant: #%d.\n",Fm); + rFm = getExtendedConstant(Fm); + if (floatx80_is_nan(rFm)) + goto unordered; + } + else + { + //fp_printk("Fm = r%d which contains a ",Fm); + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + //fp_printk("single.\n"); + if (float32_is_nan(fpa11->fpreg[Fm].fValue.fSingle)) + goto unordered; + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fValue.fSingle); + break; + + case typeDouble: + //fp_printk("double.\n"); + if (float64_is_nan(fpa11->fpreg[Fm].fValue.fDouble)) + goto unordered; + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fValue.fDouble); + break; + + case typeExtended: + //fp_printk("extended.\n"); + if (floatx80_is_nan(fpa11->fpreg[Fm].fValue.fExtended)) + goto unordered; + rFm = fpa11->fpreg[Fm].fValue.fExtended; + break; + + default: return 0; + } + } + + if (n_flag) + { + rFm.high ^= 0x8000; + } + + return PerformComparisonOperation(rFn,rFm); + + unordered: + /* ?? The FPA data sheet is pretty vague about this, in particular + about whether the non-E comparisons can ever raise exceptions. + This implementation is based on a combination of what it says in + the data sheet, observation of how the Acorn emulator actually + behaves (and how programs expect it to) and guesswork. */ + flags |= CC_OVERFLOW; + + if (BIT_AC & readFPSR()) flags |= CC_CARRY; + + if (e_flag) float_raise(float_flag_invalid); + + writeConditionCodes(flags); + return 1; +} diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c new file mode 100644 index 000000000..fb05fc6fb --- /dev/null +++ b/arch/arm/nwfpe/fpmodule.c @@ -0,0 +1,167 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell, 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" + +#ifdef MODULE +#include <linux/module.h> +#include <linux/version.h> +#else +#define MOD_INC_USE_COUNT +#define MOD_DEC_USE_COUNT +#endif + +/* XXX */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/spinlock.h> +#include <asm/atomic.h> +#include <asm/pgtable.h> +/* XXX */ + +#include "softfloat.h" +#include "fpopcode.h" +#include "fpmodule.h" +#include "fpa11.h" +#include "fpa11.inl" + +/* external data */ +extern FPA11 *fpa11; + +/* kernel symbols required for signal handling */ +typedef struct task_struct* PTASK; + +#ifdef MODULE +int fp_printk(const char *,...); +void fp_send_sig(unsigned long sig, PTASK p, int priv); +#if LINUX_VERSION_CODE > 0x20115 +MODULE_AUTHOR("Scott Bambrough <scottb@corelcomputer.com>"); +MODULE_DESCRIPTION("NWFPE floating point emulator"); +#endif + +#else +#define fp_printk printk +#define fp_send_sig send_sig +#define kern_fp_enter fp_enter +#endif + +/* kernel function prototypes required */ +void C_SYMBOL_NAME(fp_setup)(void); + +/* external declarations for saved kernel symbols */ +extern unsigned int C_SYMBOL_NAME(kern_fp_enter); + +/* forward declarations */ +extern void nwfpe_enter(void); + +/* Original value of fp_enter from kernel before patched by fpe_init. */ +static unsigned int orig_fp_enter; + +/* Address of user registers on the kernel stack. */ +unsigned int *userRegisters; + +void __init C_SYMBOL_NAME(fpe_version)(void) +{ + static const char szTitle[] = "<4>NetWinder Floating Point Emulator "; + static const char szVersion[] = "V0.94.1 "; + static const char szCopyright[] = "(c) 1998 Corel Computer Corp.\n"; + C_SYMBOL_NAME(fp_printk)(szTitle); + C_SYMBOL_NAME(fp_printk)(szVersion); + C_SYMBOL_NAME(fp_printk)(szCopyright); +} + +int __init fpe_init(void) +{ + /* Display title, version and copyright information. */ + C_SYMBOL_NAME(fpe_version)(); + + /* Save pointer to the old FP handler and then patch ourselves in */ + orig_fp_enter = C_SYMBOL_NAME(kern_fp_enter); + C_SYMBOL_NAME(kern_fp_enter) = (unsigned int)C_SYMBOL_NAME(nwfpe_enter); + + return 0; +} + +#ifdef MODULE +int init_module(void) +{ + return(fpe_init()); +} + +void cleanup_module(void) +{ + /* Restore the values we saved earlier. */ + C_SYMBOL_NAME(kern_fp_enter) = orig_fp_enter; +} +#endif + +#define _ARM_pc 60 +#define _ARM_cpsr 64 + +/* +ScottB: November 4, 1998 + +Moved this function out of softfloat-specialize into fpmodule.c. +This effectively isolates all the changes required for integrating with the +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying +fpmodule.c to integrate with the NetBSD kernel (I hope!). + +[1/1/99: Not quite true any more unfortunately. There is Linux-specific +code to access data in user space in some other source files at the +moment. --philb] + +float_exception_flags is a global variable in SoftFloat. + +This function is called by the SoftFloat routines to raise a floating +point exception. We check the trap enable byte in the FPSR, and raise +a SIGFPE exception if necessary. If not the relevant bits in the +cumulative exceptions flag byte are set and we return. +*/ + +void float_raise(signed char flags) +{ +#if 0 + printk(KERN_DEBUG "NWFPE: exception %08x at %08x from %08x\n", flags, + __builtin_return_address(0), userRegisters[15]); +#endif + + float_exception_flags |= flags; + if (readFPSR() & (flags << 16)) + { + /* raise exception */ + C_SYMBOL_NAME(fp_send_sig)(SIGFPE,C_SYMBOL_NAME(current),1); + } + else + { + /* set the cumulative exceptions flags */ + writeFPSR(flags); + } +} diff --git a/arch/arm/nwfpe/fpmodule.h b/arch/arm/nwfpe/fpmodule.h new file mode 100644 index 000000000..39c762935 --- /dev/null +++ b/arch/arm/nwfpe/fpmodule.h @@ -0,0 +1,53 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPMODULE_H__ +#define __FPMODULE_H__ + +#include <linux/config.h> + +#ifdef CONFIG_CPU_32 +#define REG_ORIG_R0 17 +#define REG_CPSR 16 +#else +#define REG_ORIG_R0 16 +#define REG_CPSR 15 +#endif + +#define REG_PC 15 +#define REG_LR 14 +#define REG_SP 13 +#define REG_IP 12 +#define REG_FP 11 +#define REG_R10 10 +#define REG_R9 9 +#define REG_R9 9 +#define REG_R8 8 +#define REG_R7 7 +#define REG_R6 6 +#define REG_R5 5 +#define REG_R4 4 +#define REG_R3 3 +#define REG_R2 2 +#define REG_R1 1 +#define REG_R0 0 + +#endif diff --git a/arch/arm/nwfpe/fpmodule.inl b/arch/arm/nwfpe/fpmodule.inl new file mode 100644 index 000000000..c76b7fd55 --- /dev/null +++ b/arch/arm/nwfpe/fpmodule.inl @@ -0,0 +1,88 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Address of user registers on the kernel stack. */ +extern unsigned int *userRegisters; + +extern __inline__ +unsigned int readRegister(const unsigned int nReg) +{ + /* Note: The CPU thinks it has dealt with the current instruction. As + a result the program counter has been advanced to the next + instruction, and points 4 bytes beyond the actual instruction + that caused the invalid instruction trap to occur. We adjust + for this in this routine. LDF/STF instructions with Rn = PC + depend on the PC being correct, as they use PC+8 in their + address calculations. */ + unsigned int val = userRegisters[nReg]; + + if (REG_PC == nReg) + val -= 4; + + return val; +} + +extern __inline__ +void writeRegister(const unsigned int nReg, const unsigned int val) +{ + userRegisters[nReg] = val; +} + +extern __inline__ +unsigned int readCPSR(void) +{ + return (readRegister(REG_CPSR)); +} + +extern __inline__ +void writeCPSR(const unsigned int val) +{ + writeRegister(REG_CPSR, val); +} + +extern __inline__ +unsigned int readConditionCodes(void) +{ +#ifdef __FPEM_TEST__ + return (0); +#else + return (readCPSR() & CC_MASK); +#endif +} + +extern __inline__ +void writeConditionCodes(const unsigned int val) +{ + unsigned int rval; + + /* + * Operate directly on userRegisters since + * the CPSR may be the PC register itself. + */ + rval = userRegisters[REG_CPSR] & ~CC_MASK; + userRegisters[REG_CPSR] = rval | (val & CC_MASK); +} + +extern __inline__ +unsigned int readMemoryInt(unsigned int *pMem) +{ + return *pMem; +} diff --git a/arch/arm/nwfpe/fpopcode.c b/arch/arm/nwfpe/fpopcode.c new file mode 100644 index 000000000..aa91e1e95 --- /dev/null +++ b/arch/arm/nwfpe/fpopcode.c @@ -0,0 +1,164 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpsr.h" +#include "fpa11.h" +#include "fpmodule.h" +#include "fpmodule.inl" + +static floatx80 floatx80Constant[] = { + { 0x0000, 0x0000000000000000ULL}, /* extended 0.0 */ + { 0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */ + { 0x4000, 0x8000000000000000ULL}, /* extended 2.0 */ + { 0x4000, 0xc000000000000000ULL}, /* extended 3.0 */ + { 0x4001, 0x8000000000000000ULL}, /* extended 4.0 */ + { 0x4001, 0xa000000000000000ULL}, /* extended 5.0 */ + { 0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */ + { 0x4002, 0xa000000000000000ULL} /* extended 10.0 */ +}; + +static float64 float64Constant[] = { + 0x0000000000000000ULL, /* double 0.0 */ + 0x3ff0000000000000ULL, /* double 1.0 */ + 0x4000000000000000ULL, /* double 2.0 */ + 0x4008000000000000ULL, /* double 3.0 */ + 0x4010000000000000ULL, /* double 4.0 */ + 0x4014000000000000ULL, /* double 5.0 */ + 0x3fe0000000000000ULL, /* double 0.5 */ + 0x4024000000000000ULL /* double 10.0 */ +}; + +static float32 float32Constant[] = { + 0x00000000, /* single 0.0 */ + 0x3f800000, /* single 1.0 */ + 0x40000000, /* single 2.0 */ + 0x40400000, /* single 3.0 */ + 0x40800000, /* single 4.0 */ + 0x40a00000, /* single 5.0 */ + 0x3f000000, /* single 0.5 */ + 0x41200000 /* single 10.0 */ +}; + +floatx80 getExtendedConstant(const unsigned int nIndex) +{ + return floatx80Constant[nIndex]; +} + +float64 getDoubleConstant(const unsigned int nIndex) +{ + return float64Constant[nIndex]; +} + +float32 getSingleConstant(const unsigned int nIndex) +{ + return float32Constant[nIndex]; +} + +unsigned int getTransferLength(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case 0x00000000: nRc = 1; break; /* single precision */ + case 0x00008000: nRc = 2; break; /* double precision */ + case 0x00400000: nRc = 3; break; /* extended precision */ + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getRegisterCount(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_REGISTER_COUNT) + { + case 0x00000000: nRc = 4; break; + case 0x00008000: nRc = 1; break; + case 0x00400000: nRc = 2; break; + case 0x00408000: nRc = 3; break; + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getRoundingPrecision(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_ROUNDING_PRECISION) + { + case 0x00000000: nRc = 1; break; + case 0x00000080: nRc = 2; break; + case 0x00080000: nRc = 3; break; + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getDestinationSize(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_DESTINATION_SIZE) + { + case 0x00000000: nRc = typeSingle; break; + case 0x00000080: nRc = typeDouble; break; + case 0x00080000: nRc = typeExtended; break; + default: nRc = typeNone; + } + + return(nRc); +} + +/* contition code lookup table + index into the table is test code: EQ, NE, ... LT, GT, AL, NV + bit position in short is condition code: NZCV */ +unsigned short aCC[16] = { + 0xF0F0, // EQ == Z set + 0x0F0F, // NE + 0xCCCC, // CS == C set + 0x3333, // CC + 0xFF00, // MI == N set + 0x00FF, // PL + 0xAAAA, // VS == V set + 0x5555, // VC + 0x0C0C, // HI == C set && Z clear + 0xF3F3, // LS == C clear || Z set + 0xAA55, // GE == (N==V) + 0x55AA, // LT == (N!=V) + 0x0A05, // GT == (!Z && (N==V)) + 0xF5FA, // LE == (Z || (N!=V)) + 0xFFFF, // AL always + 0 // NV +}; + +unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes) +{ + return (aCC[opcode>>28] >> (ccodes>>28)) & 1; +} diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h new file mode 100644 index 000000000..d6d7aa11a --- /dev/null +++ b/arch/arm/nwfpe/fpopcode.h @@ -0,0 +1,376 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPOPCODE_H__ +#define __FPOPCODE_H__ + +/* +ARM Floating Point Instruction Classes +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +|c o n d|1 1 0 P|U|u|W|L| Rn |v| Fd |0|0|0|1| o f f s e t | CPDT +|c o n d|1 1 0 P|U|w|W|L| Rn |x| Fd |0|0|0|1| o f f s e t | CPDT +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +|c o n d|1 1 1 0|a|b|c|d|e| Fn |j| Fd |0|0|0|1|f|g|h|0|i| Fm | CPDO +|c o n d|1 1 1 0|a|b|c|L|e| Fn | Rd |0|0|0|1|f|g|h|1|i| Fm | CPRT +|c o n d|1 1 1 0|a|b|c|1|e| Fn |1|1|1|1|0|0|0|1|f|g|h|1|i| Fm | comparisons +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + +CPDT data transfer instructions + LDF, STF, LFM, SFM + +CPDO dyadic arithmetic instructions + ADF, MUF, SUF, RSF, DVF, RDF, + POW, RPW, RMF, FML, FDV, FRD, POL + +CPDO monadic arithmetic instructions + MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP, + SIN, COS, TAN, ASN, ACS, ATN, URD, NRM + +CPRT joint arithmetic/data transfer instructions + FIX (arithmetic followed by load/store) + FLT (load/store followed by arithmetic) + CMF, CNF CMFE, CNFE (comparisons) + WFS, RFS (write/read floating point status register) + WFC, RFC (write/read floating point control register) + +cond condition codes +P pre/post index bit: 0 = postindex, 1 = preindex +U up/down bit: 0 = stack grows down, 1 = stack grows up +W write back bit: 1 = update base register (Rn) +L load/store bit: 0 = store, 1 = load +Rn base register +Rd destination/source register +Fd floating point destination register +Fn floating point source register +Fm floating point source register or floating point constant + +uv transfer length (TABLE 1) +wx register count (TABLE 2) +abcd arithmetic opcode (TABLES 3 & 4) +ef destination size (rounding precision) (TABLE 5) +gh rounding mode (TABLE 6) +j dyadic/monadic bit: 0 = dyadic, 1 = monadic +i constant bit: 1 = constant (TABLE 6) +*/ + +/* +TABLE 1 ++-------------------------+---+---+---------+---------+ +| Precision | u | v | FPSR.EP | length | ++-------------------------+---+---+---------+---------+ +| Single | 0 ü 0 | x | 1 words | +| Double | 1 ü 1 | x | 2 words | +| Extended | 1 ü 1 | x | 3 words | +| Packed decimal | 1 ü 1 | 0 | 3 words | +| Expanded packed decimal | 1 ü 1 | 1 | 4 words | ++-------------------------+---+---+---------+---------+ +Note: x = don't care +*/ + +/* +TABLE 2 ++---+---+---------------------------------+ +| w | x | Number of registers to transfer | ++---+---+---------------------------------+ +| 0 ü 1 | 1 | +| 1 ü 0 | 2 | +| 1 ü 1 | 3 | +| 0 ü 0 | 4 | ++---+---+---------------------------------+ +*/ + +/* +TABLE 3: Dyadic Floating Point Opcodes ++---+---+---+---+----------+-----------------------+-----------------------+ +| a | b | c | d | Mnemonic | Description | Operation | ++---+---+---+---+----------+-----------------------+-----------------------+ +| 0 | 0 | 0 | 0 | ADF | Add | Fd := Fn + Fm | +| 0 | 0 | 0 | 1 | MUF | Multiply | Fd := Fn * Fm | +| 0 | 0 | 1 | 0 | SUF | Subtract | Fd := Fn - Fm | +| 0 | 0 | 1 | 1 | RSF | Reverse subtract | Fd := Fm - Fn | +| 0 | 1 | 0 | 0 | DVF | Divide | Fd := Fn / Fm | +| 0 | 1 | 0 | 1 | RDF | Reverse divide | Fd := Fm / Fn | +| 0 | 1 | 1 | 0 | POW | Power | Fd := Fn ^ Fm | +| 0 | 1 | 1 | 1 | RPW | Reverse power | Fd := Fm ^ Fn | +| 1 | 0 | 0 | 0 | RMF | Remainder | Fd := IEEE rem(Fn/Fm) | +| 1 | 0 | 0 | 1 | FML | Fast Multiply | Fd := Fn * Fm | +| 1 | 0 | 1 | 0 | FDV | Fast Divide | Fd := Fn / Fm | +| 1 | 0 | 1 | 1 | FRD | Fast reverse divide | Fd := Fm / Fn | +| 1 | 1 | 0 | 0 | POL | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm) | +| 1 | 1 | 0 | 1 | | undefined instruction | trap | +| 1 | 1 | 1 | 0 | | undefined instruction | trap | +| 1 | 1 | 1 | 1 | | undefined instruction | trap | ++---+---+---+---+----------+-----------------------+-----------------------+ +Note: POW, RPW, POL are deprecated, and are available for backwards + compatibility only. +*/ + +/* +TABLE 4: Monadic Floating Point Opcodes ++---+---+---+---+----------+-----------------------+-----------------------+ +| a | b | c | d | Mnemonic | Description | Operation | ++---+---+---+---+----------+-----------------------+-----------------------+ +| 0 | 0 | 0 | 0 | MVF | Move | Fd := Fm | +| 0 | 0 | 0 | 1 | MNF | Move negated | Fd := - Fm | +| 0 | 0 | 1 | 0 | ABS | Absolute value | Fd := abs(Fm) | +| 0 | 0 | 1 | 1 | RND | Round to integer | Fd := int(Fm) | +| 0 | 1 | 0 | 0 | SQT | Square root | Fd := sqrt(Fm) | +| 0 | 1 | 0 | 1 | LOG | Log base 10 | Fd := log10(Fm) | +| 0 | 1 | 1 | 0 | LGN | Log base e | Fd := ln(Fm) | +| 0 | 1 | 1 | 1 | EXP | Exponent | Fd := e ^ Fm | +| 1 | 0 | 0 | 0 | SIN | Sine | Fd := sin(Fm) | +| 1 | 0 | 0 | 1 | COS | Cosine | Fd := cos(Fm) | +| 1 | 0 | 1 | 0 | TAN | Tangent | Fd := tan(Fm) | +| 1 | 0 | 1 | 1 | ASN | Arc Sine | Fd := arcsin(Fm) | +| 1 | 1 | 0 | 0 | ACS | Arc Cosine | Fd := arccos(Fm) | +| 1 | 1 | 0 | 1 | ATN | Arc Tangent | Fd := arctan(Fm) | +| 1 | 1 | 1 | 0 | URD | Unnormalized round | Fd := int(Fm) | +| 1 | 1 | 1 | 1 | NRM | Normalize | Fd := norm(Fm) | ++---+---+---+---+----------+-----------------------+-----------------------+ +Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are + available for backwards compatibility only. +*/ + +/* +TABLE 5 ++-------------------------+---+---+ +| Rounding Precision | e | f | ++-------------------------+---+---+ +| IEEE Single precision | 0 ü 0 | +| IEEE Double precision | 0 ü 1 | +| IEEE Extended precision | 1 ü 0 | +| undefined (trap) | 1 ü 1 | ++-------------------------+---+---+ +*/ + +/* +TABLE 5 ++---------------------------------+---+---+ +| Rounding Mode | g | h | ++---------------------------------+---+---+ +| Round to nearest (default) | 0 ü 0 | +| Round toward plus infinity | 0 ü 1 | +| Round toward negative infinity | 1 ü 0 | +| Round toward zero | 1 ü 1 | ++---------------------------------+---+---+ +*/ + +/* +=== +=== Definitions for load and store instructions +=== +*/ + +/* bit masks */ +#define BIT_PREINDEX 0x01000000 +#define BIT_UP 0x00800000 +#define BIT_WRITE_BACK 0x00200000 +#define BIT_LOAD 0x00100000 + +/* masks for load/store */ +#define MASK_CPDT 0x0c000000 /* data processing opcode */ +#define MASK_OFFSET 0x000000ff +#define MASK_TRANSFER_LENGTH 0x00408000 +#define MASK_REGISTER_COUNT MASK_TRANSFER_LENGTH +#define MASK_COPROCESSOR 0x00000f00 + +/* Tests for transfer length */ +#define TRANSFER_SINGLE 0x00000000 +#define TRANSFER_DOUBLE 0x00008000 +#define TRANSFER_EXTENDED 0x00400000 +#define TRANSFER_PACKED MASK_TRANSFER_LENGTH + +/* Get the coprocessor number from the opcode. */ +#define getCoprocessorNumber(opcode) ((opcode & MASK_COPROCESSOR) >> 8) + +/* Get the offset from the opcode. */ +#define getOffset(opcode) (opcode & MASK_OFFSET) + +/* Tests for specific data transfer load/store opcodes. */ +#define TEST_OPCODE(opcode,mask) (((opcode) & (mask)) == (mask)) + +#define LOAD_OP(opcode) TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD) +#define STORE_OP(opcode) ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT) + +#define LDF_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1)) +#define LFM_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2)) +#define STF_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1)) +#define SFM_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2)) + +#define PREINDEXED(opcode) ((opcode & BIT_PREINDEX) != 0) +#define POSTINDEXED(opcode) ((opcode & BIT_PREINDEX) == 0) +#define BIT_UP_SET(opcode) ((opcode & BIT_UP) != 0) +#define BIT_UP_CLEAR(opcode) ((opcode & BIT_DOWN) == 0) +#define WRITE_BACK(opcode) ((opcode & BIT_WRITE_BACK) != 0) +#define LOAD(opcode) ((opcode & BIT_LOAD) != 0) +#define STORE(opcode) ((opcode & BIT_LOAD) == 0) + +/* +=== +=== Definitions for arithmetic instructions +=== +*/ +/* bit masks */ +#define BIT_MONADIC 0x00008000 +#define BIT_CONSTANT 0x00000008 + +#define CONSTANT_FM(opcode) ((opcode & BIT_CONSTANT) != 0) +#define MONADIC_INSTRUCTION(opcode) ((opcode & BIT_MONADIC) != 0) + +/* instruction identification masks */ +#define MASK_CPDO 0x0e000000 /* arithmetic opcode */ +#define MASK_ARITHMETIC_OPCODE 0x00f08000 +#define MASK_DESTINATION_SIZE 0x00080080 + +/* dyadic arithmetic opcodes. */ +#define ADF_CODE 0x00000000 +#define MUF_CODE 0x00100000 +#define SUF_CODE 0x00200000 +#define RSF_CODE 0x00300000 +#define DVF_CODE 0x00400000 +#define RDF_CODE 0x00500000 +#define POW_CODE 0x00600000 +#define RPW_CODE 0x00700000 +#define RMF_CODE 0x00800000 +#define FML_CODE 0x00900000 +#define FDV_CODE 0x00a00000 +#define FRD_CODE 0x00b00000 +#define POL_CODE 0x00c00000 +/* 0x00d00000 is an invalid dyadic arithmetic opcode */ +/* 0x00e00000 is an invalid dyadic arithmetic opcode */ +/* 0x00f00000 is an invalid dyadic arithmetic opcode */ + +/* monadic arithmetic opcodes. */ +#define MVF_CODE 0x00008000 +#define MNF_CODE 0x00108000 +#define ABS_CODE 0x00208000 +#define RND_CODE 0x00308000 +#define SQT_CODE 0x00408000 +#define LOG_CODE 0x00508000 +#define LGN_CODE 0x00608000 +#define EXP_CODE 0x00708000 +#define SIN_CODE 0x00808000 +#define COS_CODE 0x00908000 +#define TAN_CODE 0x00a08000 +#define ASN_CODE 0x00b08000 +#define ACS_CODE 0x00c08000 +#define ATN_CODE 0x00d08000 +#define URD_CODE 0x00e08000 +#define NRM_CODE 0x00f08000 + +/* +=== +=== Definitions for register transfer and comparison instructions +=== +*/ + +#define MASK_CPRT 0x0e000010 /* register transfer opcode */ +#define MASK_CPRT_CODE 0x00f00000 +#define FLT_CODE 0x00000000 +#define FIX_CODE 0x00100000 +#define WFS_CODE 0x00200000 +#define RFS_CODE 0x00300000 +#define WFC_CODE 0x00400000 +#define RFC_CODE 0x00500000 +#define CMF_CODE 0x00900000 +#define CNF_CODE 0x00b00000 +#define CMFE_CODE 0x00d00000 +#define CNFE_CODE 0x00f00000 + +/* +=== +=== Common definitions +=== +*/ + +/* register masks */ +#define MASK_Rd 0x0000f000 +#define MASK_Rn 0x000f0000 +#define MASK_Fd 0x00007000 +#define MASK_Fm 0x00000007 +#define MASK_Fn 0x00070000 + +/* condition code masks */ +#define CC_MASK 0xf0000000 +#define CC_NEGATIVE 0x80000000 +#define CC_ZERO 0x40000000 +#define CC_CARRY 0x20000000 +#define CC_OVERFLOW 0x10000000 +#define CC_EQ 0x00000000 +#define CC_NE 0x10000000 +#define CC_CS 0x20000000 +#define CC_HS CC_CS +#define CC_CC 0x30000000 +#define CC_LO CC_CC +#define CC_MI 0x40000000 +#define CC_PL 0x50000000 +#define CC_VS 0x60000000 +#define CC_VC 0x70000000 +#define CC_HI 0x80000000 +#define CC_LS 0x90000000 +#define CC_GE 0xa0000000 +#define CC_LT 0xb0000000 +#define CC_GT 0xc0000000 +#define CC_LE 0xd0000000 +#define CC_AL 0xe0000000 +#define CC_NV 0xf0000000 + +/* rounding masks/values */ +#define MASK_ROUNDING_MODE 0x00000060 +#define ROUND_TO_NEAREST 0x00000000 +#define ROUND_TO_PLUS_INFINITY 0x00000020 +#define ROUND_TO_MINUS_INFINITY 0x00000040 +#define ROUND_TO_ZERO 0x00000060 + +#define MASK_ROUNDING_PRECISION 0x00080080 +#define ROUND_SINGLE 0x00000000 +#define ROUND_DOUBLE 0x00000080 +#define ROUND_EXTENDED 0x00080000 + +/* Get the condition code from the opcode. */ +#define getCondition(opcode) (opcode >> 28) + +/* Get the source register from the opcode. */ +#define getRn(opcode) ((opcode & MASK_Rn) >> 16) + +/* Get the destination floating point register from the opcode. */ +#define getFd(opcode) ((opcode & MASK_Fd) >> 12) + +/* Get the first source floating point register from the opcode. */ +#define getFn(opcode) ((opcode & MASK_Fn) >> 16) + +/* Get the second source floating point register from the opcode. */ +#define getFm(opcode) (opcode & MASK_Fm) + +/* Get the destination register from the opcode. */ +#define getRd(opcode) ((opcode & MASK_Rd) >> 12) + +/* Get the rounding mode from the opcode. */ +#define getRoundingMode(opcode) ((opcode & MASK_ROUNDING_MODE) >> 5) + +float32 getSingleConstant(const unsigned int nIndex); +float64 getDoubleConstant(const unsigned int nIndex); +floatx80 getExtendedConstant(const unsigned int nIndex); + +unsigned int getRegisterCount(const unsigned int opcode); +unsigned int getDestinationSize(const unsigned int opcode); + +#endif diff --git a/arch/arm/nwfpe/fpsr.h b/arch/arm/nwfpe/fpsr.h new file mode 100644 index 000000000..f58994ac2 --- /dev/null +++ b/arch/arm/nwfpe/fpsr.h @@ -0,0 +1,108 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPSR_H__ +#define __FPSR_H__ + +/* +The FPSR is a 32 bit register consisting of 4 parts, each exactly +one byte. + + SYSTEM ID + EXCEPTION TRAP ENABLE BYTE + SYSTEM CONTROL BYTE + CUMULATIVE EXCEPTION FLAGS BYTE + +The FPCR is a 32 bit register consisting of bit flags. +*/ + +/* SYSTEM ID +------------ +Note: the system id byte is read only */ + +typedef unsigned int FPSR; /* type for floating point status register */ +typedef unsigned int FPCR; /* type for floating point control register */ + +#define MASK_SYSID 0xff000000 +#define BIT_HARDWARE 0x80000000 +#define FP_EMULATOR 0x01000000 /* System ID for emulator */ +#define FP_ACCELERATOR 0x81000000 /* System ID for FPA11 */ + +/* EXCEPTION TRAP ENABLE BYTE +----------------------------- */ + +#define MASK_TRAP_ENABLE 0x00ff0000 +#define MASK_TRAP_ENABLE_STRICT 0x001f0000 +#define BIT_IXE 0x00100000 /* inexact exception enable */ +#define BIT_UFE 0x00080000 /* underflow exception enable */ +#define BIT_OFE 0x00040000 /* overflow exception enable */ +#define BIT_DZE 0x00020000 /* divide by zero exception enable */ +#define BIT_IOE 0x00010000 /* invalid operation exception enable */ + +/* SYSTEM CONTROL BYTE +---------------------- */ + +#define MASK_SYSTEM_CONTROL 0x0000ff00 +#define MASK_TRAP_STRICT 0x00001f00 + +#define BIT_AC 0x00100000 /* use alternative C-flag definition + for compares */ +#define BIT_EP 0x00080000 /* use expanded packed decimal format */ +#define BIT_SO 0x00040000 /* select synchronous operation of FPA */ +#define BIT_NE 0x00020000 /* NaN exception bit */ +#define BIT_ND 0x00010000 /* no denormalized numbers bit */ + +/* CUMULATIVE EXCEPTION FLAGS BYTE +---------------------------------- */ + +#define MASK_EXCEPTION_FLAGS 0x000000ff +#define MASK_EXCEPTION_FLAGS_STRICT 0x0000001f + +#define BIT_IXC 0x00000010 /* inexact exception flag */ +#define BIT_UFC 0x00000008 /* underflow exception flag */ +#define BIT_OFC 0x00000004 /* overfloat exception flag */ +#define BIT_DZC 0x00000002 /* divide by zero exception flag */ +#define BIT_IOC 0x00000001 /* invalid operation exception flag */ + +/* Floating Point Control Register +----------------------------------*/ + +#define BIT_RU 0x80000000 /* rounded up bit */ +#define BIT_IE 0x10000000 /* inexact bit */ +#define BIT_MO 0x08000000 /* mantissa overflow bit */ +#define BIT_EO 0x04000000 /* exponent overflow bit */ +#define BIT_SB 0x00000800 /* store bounce */ +#define BIT_AB 0x00000400 /* arithmetic bounce */ +#define BIT_RE 0x00000200 /* rounding exception */ +#define BIT_DA 0x00000100 /* disable FPA */ + +#define MASK_OP 0x00f08010 /* AU operation code */ +#define MASK_PR 0x00080080 /* AU precision */ +#define MASK_S1 0x00070000 /* AU source register 1 */ +#define MASK_S2 0x00000007 /* AU source register 2 */ +#define MASK_DS 0x00007000 /* AU destination register */ +#define MASK_RM 0x00000060 /* AU rounding mode */ +#define MASK_ALU 0x9cfff2ff /* only ALU can write these bits */ +#define MASK_RESET 0x00000d00 /* bits set on reset, all others cleared */ +#define MASK_WFC MASK_RESET +#define MASK_RFC ~MASK_RESET + +#endif diff --git a/arch/arm/nwfpe/milieu.h b/arch/arm/nwfpe/milieu.h new file mode 100644 index 000000000..a3892ab2d --- /dev/null +++ b/arch/arm/nwfpe/milieu.h @@ -0,0 +1,48 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "ARM-gcc.h" + +/* +------------------------------------------------------------------------------- +Symbolic Boolean literals. +------------------------------------------------------------------------------- +*/ +enum { + FALSE = 0, + TRUE = 1 +}; + diff --git a/arch/arm/nwfpe/single_cpdo.c b/arch/arm/nwfpe/single_cpdo.c new file mode 100644 index 000000000..f8405ee57 --- /dev/null +++ b/arch/arm/nwfpe/single_cpdo.c @@ -0,0 +1,259 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "milieu.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" + +float32 getSingleConstant(unsigned int); + +float32 float32_exp(float32 Fm); +float32 float32_ln(float32 Fm); +float32 float32_sin(float32 rFm); +float32 float32_cos(float32 rFm); +float32 float32_arcsin(float32 rFm); +float32 float32_arctan(float32 rFm); +float32 float32_log(float32 rFm); +float32 float32_tan(float32 rFm); +float32 float32_arccos(float32 rFm); +float32 float32_pow(float32 rFn,float32 rFm); +float32 float32_pol(float32 rFn,float32 rFm); + +unsigned int SingleCPDO(const unsigned int opcode) +{ + float32 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getSingleConstant(Fm); + } + else + { + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + rFm = fpa11->fpreg[Fm].fValue.fSingle; + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + rFn = fpa11->fpreg[Fn].fValue.fSingle; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = rFm; + break; + + case MNF_CODE: + rFm ^= 0x80000000; + fpa11->fpreg[Fd].fValue.fSingle = rFm; + break; + + case ABS_CODE: + rFm &= 0x7fffffff; + fpa11->fpreg[Fd].fValue.fSingle = rFm; + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fValue.fSingle = + int32_to_float32(float32_to_int32(rFm)); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fpreg[Fd].fType = typeSingle; + return nRc; +} + +#if 0 +float32 float32_exp(float32 Fm) +{ +//series +} + +float32 float32_ln(float32 Fm) +{ +//series +} + +float32 float32_sin(float32 rFm) +{ +//series +} + +float32 float32_cos(float32 rFm) +{ +//series +} + +float32 float32_arcsin(float32 rFm) +{ +//series +} + +float32 float32_arctan(float32 rFm) +{ + //series +} + +float32 float32_arccos(float32 rFm) +{ + //return float32_sub(halfPi,float32_arcsin(rFm)); +} + +float32 float32_log(float32 rFm) +{ + return float32_div(float32_ln(rFm),getSingleConstant(7)); +} + +float32 float32_tan(float32 rFm) +{ + return float32_div(float32_sin(rFm),float32_cos(rFm)); +} + +float32 float32_pow(float32 rFn,float32 rFm) +{ + return float32_exp(float32_mul(rFm,float32_ln(rFn))); +} + +float32 float32_pol(float32 rFn,float32 rFm) +{ + return float32_arctan(float32_div(rFn,rFm)); +} +#endif diff --git a/arch/arm/nwfpe/softfloat-macros b/arch/arm/nwfpe/softfloat-macros new file mode 100644 index 000000000..5469989f2 --- /dev/null +++ b/arch/arm/nwfpe/softfloat-macros @@ -0,0 +1,740 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; +} + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 64, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +{ + bits64 z; + + __asm__("@shift64RightJamming -- start"); + if ( count == 0 ) { + z = a; + } + else if ( count < 64 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + __asm__("@shift64RightJamming -- end"); + *zPtr = z; +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +_plus_ the number of bits given in `count'. The shifted result is at most +64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +bits shifted off form a second 64-bit result as follows: The _last_ bit +shifted off is the most-significant bit of the extra result, and the other +63 bits of the extra result are all zero if and only if _all_but_the_last_ +bits shifted off were all zero. This extra result is stored in the location +pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0' and `a1' are considered to form a +fixed-point value with binary point between `a0' and `a1'. This fixed-point +value is shifted right by the number of bits given in `count', and the +integer part of the result is returned at the location pointed to by +`z0Ptr'. The fractional part of the result may be slightly corrupted as +described above, and is returned at the location pointed to by `z1Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift64ExtraRightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1 != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 128, the result will be 0. The result is broken into two 64-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128Right( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 128, the result will be either 0 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 64-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128RightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 128 ) { + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right +by 64 _plus_ the number of bits given in `count'. The shifted result is +at most 128 nonzero bits; these are broken into two 64-bit pieces which are +stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted +off form a third 64-bit result as follows: The _last_ bit shifted off is +the most-significant bit of the extra result, and the other 63 bits of the +extra result are all zero if and only if _all_but_the_last_ bits shifted off +were all zero. This extra result is stored in the location pointed to by +`z2Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0', `a1', and `a2' are considered +to form a fixed-point value with binary point between `a1' and `a2'. This +fixed-point value is shifted right by the number of bits given in `count', +and the integer part of the result is returned at the locations pointed to +by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly +corrupted as described above, and is returned at the location pointed to by +`z2Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift128ExtraRightJamming( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z2 = a2; + z1 = a1; + z0 = a0; + } + else { + if ( count < 64 ) { + z2 = a1<<negCount; + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 128 ) { + z2 = a0<<negCount; + z1 = a0>>( count & 63 ); + } + else { + z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 64. The result is broken into two 64-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift128Left( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1<<count; + *z0Ptr = + ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 64. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift192Left( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount; + + z2 = a2<<count; + z1 = a1<<count; + z0 = a0<<count; + if ( 0 < count ) { + negCount = ( ( - count ) & 63 ); + z1 |= a2>>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +any carry out is lost. The result is broken into two 64-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^192, so any carry out is lost. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^128, so any borrow out (carry out) is lost. The result is broken into two +64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +result is broken into three 64-bit pieces which are stored at the locations +pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +into two 64-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits32 aHigh, aLow, bHigh, bLow; + bits64 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>32; + bLow = b; + bHigh = b>>32; + z1 = ( (bits64) aLow ) * bLow; + zMiddleA = ( (bits64) aLow ) * bHigh; + zMiddleB = ( (bits64) aHigh ) * bLow; + z0 = ( (bits64) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to +obtain a 192-bit product. The product is broken into three 64-bit pieces +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128By64To192( + bits64 a0, + bits64 a1, + bits64 b, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2, more1; + + mul64To128( a1, b, &z1, &z2 ); + mul64To128( a0, b, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +product. The product is broken into four 64-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128To256( + bits64 a0, + bits64 a1, + bits64 b0, + bits64 b1, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr, + bits64 *z3Ptr + ) +{ + bits64 z0, z1, z2, z3; + bits64 more1, more2; + + mul64To128( a1, b1, &z2, &z3 ); + mul64To128( a1, b0, &z1, &more2 ); + add128( z1, more2, 0, z2, &z1, &z2 ); + mul64To128( a0, b0, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + mul64To128( a0, b1, &more1, &more2 ); + add128( more1, more2, 0, z2, &more1, &z2 ); + add128( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 64-bit integer quotient obtained by dividing +`b' into the 128-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^63. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +{ + bits64 b0, b1; + bits64 rem0, rem1, term0, term1; + bits64 z; + if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); + b0 = b>>32; + z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; + mul64To128( b, z, &term0, &term1 ); + sub128( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits64) rem0 ) < 0 ) { + z -= LIT64( 0x100000000 ); + b1 = b<<32; + add128( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<32 ) | ( rem1>>32 ); + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit +of `a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit +of `a'. If `a' is zero, 64 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros64( bits64 a ) +{ + int8 shiftCount; + + shiftCount = 0; + if ( a < ( (bits64) 1 )<<32 ) { + shiftCount += 32; + } + else { + a >>= 32; + } + shiftCount += countLeadingZeros32( a ); + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +is equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is +not equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/arch/arm/nwfpe/softfloat-specialize b/arch/arm/nwfpe/softfloat-specialize new file mode 100644 index 000000000..f03e5c6d4 --- /dev/null +++ b/arch/arm/nwfpe/softfloat-specialize @@ -0,0 +1,471 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. + +ScottB: November 4, 1998 +Moved this function out of softfloat-specialize into fpmodule.c. +This effectively isolates all the changes required for integrating with the +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying +fpmodule.c to integrate with the NetBSD kernel (I hope!). +------------------------------------------------------------------------------- +void float_raise( int8 flags ) +{ + float_exception_flags |= flags; +} +*/ + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_nan( float32 a ) +{ + + return ( 0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_nan( float64 a ) +{ + + return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>63; + z.low = 0; + z.high = a<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= LIT64( 0x0008000000000000 ); + b |= LIT64( 0x0008000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + //register int lr; + bits64 aLow; + + //__asm__("mov %0, lr" : : "g" (lr)); + //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr); + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated quadruple-precision NaN. The `high' and +`low' values hold the most- and least-significant bits, respectively. +------------------------------------------------------------------------------- +*/ +#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_nan( float128 a ) +{ + + return + ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_signaling_nan( float128 a ) +{ + + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float128ToCommonNaN( float128 a ) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>63; + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the quadruple- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two quadruple-precision floating-point values `a' and `b', one of +which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float128 propagateFloat128NaN( float128 a, float128 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c new file mode 100644 index 000000000..a7fc76cc8 --- /dev/null +++ b/arch/arm/nwfpe/softfloat.c @@ -0,0 +1,4877 @@ +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include "milieu.h" +#include "softfloat.h" + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode, extended double-precision rounding precision, +and exception flags. +------------------------------------------------------------------------------- +*/ +int8 float_rounding_mode = float_round_nearest_even; +int8 floatx80_rounding_precision = 80; +int8 float_exception_flags = 0; + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize" + +/* +------------------------------------------------------------------------------- +Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +and 7, and returns the properly rounded 32-bit integer corresponding to the +input. If `zSign' is nonzero, the input is negated before being converted +to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point +input is simply rounded to an integer, with the inexact exception raised if +the input cannot be represented exactly as an integer. If the fixed-point +input is too large, however, the invalid exception is raised and the largest +positive or negative integer is returned. +------------------------------------------------------------------------------- +*/ +static int32 roundAndPackInt32( flag zSign, bits64 absZ ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + int32 z; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_exception_flags |= float_flag_invalid; + return zSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +single-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ +#if 0 + float32 f; + __asm__("@ packFloat32; + mov %0, %1, asl #31; + orr %0, %2, asl #23; + orr %0, %3" + : /* no outputs */ + : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig) + : "cc"); + return f; +#else + return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; +#endif +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the single-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal single- +precision floating-point number. + The input significand `zSig' has its binary point between bits 30 +and 29, which is 7 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (bits16) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized in +any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat64Frac( float64 a ) +{ + + return a & LIT64( 0x000FFFFFFFFFFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return ( a>>52 ) & 0x7FF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return a>>63; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ) - 11; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +double-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + + return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the double-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal double- +precision floating-point number. + The input significand `zSig' has its binary point between bits 62 +and 61, which is 10 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int16 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x200; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x3FF; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x3FF; + if ( 0x7FD <= (bits16) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) + ) { + //register int lr; + //__asm__("mov %0, lr" :: "g" (lr)); + //fp_printk("roundAndPackFloat64 called from 0x%08x\n",lr); + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); + shift64RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x3FF; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat64' except that `zSig' does not have to be normalized in +any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float64 + normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( zSig ) - 1; + return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the extended double-precision floating-point +value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloatx80Frac( floatx80 a ) +{ + + return a.low; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the extended double-precision floating-point +value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int32 extractFloatx80Exp( floatx80 a ) +{ + + return a.high & 0x7FFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the extended double-precision floating-point value +`a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloatx80Sign( floatx80 a ) +{ + + return a.high>>15; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal extended double-precision floating-point value +represented by the denormalized significand `aSig'. The normalized exponent +and significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ); + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into an +extended double-precision floating-point value, returning the result. +------------------------------------------------------------------------------- +*/ +INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) +{ + floatx80 z; + + z.low = zSig; + z.high = ( ( (bits16) zSign )<<15 ) + zExp; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and extended significand formed by the concatenation of `zSig0' and `zSig1', +and returns the proper extended double-precision floating-point value +corresponding to the abstract input. Ordinarily, the abstract value is +rounded and packed into the extended double-precision format, with the +inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal extended +double-precision floating-point number. + If `roundingPrecision' is 32 or 64, the result is rounded to the same +number of bits as single or double precision, respectively. Otherwise, the +result is rounded to the full precision of the extended double-precision +format. + The input significand must be normalized or smaller. If the input +significand is not normalized, `zExp' must be 0; in that case, the result +returned is a subnormal number, and it must not require rounding. The +handling of underflow and overflow follows the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 + roundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + ) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + int64 roundIncrement, roundMask, roundBits; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } + else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } + else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; + } + if ( zExp <= 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ( zSig0 <= zSig0 + roundIncrement ); + shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( (sbits64) zSig0 < 0 ) zExp = 1; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + increment = ( (sbits64) zSig1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + } + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) + && increment + ) + ) { + roundMask = 0; + overflow: + float_raise( float_flag_overflow | float_flag_inexact ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( zExp <= 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ! increment + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); + zExp = 0; + if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); + if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( roundNearestEven ) { + increment = ( (sbits64) zSig1 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + if ( increment ) { + ++zSig0; + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); + if ( (sbits64) zSig0 < 0 ) zExp = 1; + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + else { + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); + } + } + else { + if ( zSig0 == 0 ) zExp = 0; + } + + return packFloatx80( zSign, zExp, zSig0 ); +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent +`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +and returns the proper extended double-precision floating-point value +corresponding to the abstract input. This routine is just like +`roundAndPackFloatx80' except that the input significand does not have to be +normalized. +------------------------------------------------------------------------------- +*/ +static floatx80 + normalizeRoundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + ) +{ + int8 shiftCount; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return + roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the least-significant 64 fraction bits of the quadruple-precision +floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat128Frac1( float128 a ) +{ + + return a.low; + +} + +/* +------------------------------------------------------------------------------- +Returns the most-significant 48 fraction bits of the quadruple-precision +floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat128Frac0( float128 a ) +{ + + return a.high & LIT64( 0x0000FFFFFFFFFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the quadruple-precision floating-point value +`a'. +------------------------------------------------------------------------------- +*/ +INLINE int32 extractFloat128Exp( float128 a ) +{ + + return ( a.high>>48 ) & 0x7FFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the quadruple-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat128Sign( float128 a ) +{ + + return a.high>>63; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal quadruple-precision floating-point value +represented by the denormalized significand formed by the concatenation of +`aSig0' and `aSig1'. The normalized exponent is stored at the location +pointed to by `zExpPtr'. The most significant 49 bits of the normalized +significand are stored at the location pointed to by `zSig0Ptr', and the +least significant 64 bits of the normalized significand are stored at the +location pointed to by `zSig1Ptr'. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat128Subnormal( + bits64 aSig0, + bits64 aSig1, + int32 *zExpPtr, + bits64 *zSig0Ptr, + bits64 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros64( aSig1 ) - 15; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 63 ); + } + else { + *zSig0Ptr = aSig1<<shiftCount; + *zSig1Ptr = 0; + } + *zExpPtr = - shiftCount - 63; + } + else { + shiftCount = countLeadingZeros64( aSig0 ) - 15; + shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); + *zExpPtr = 1 - shiftCount; + } + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', the exponent `zExp', and the significand formed +by the concatenation of `zSig0' and `zSig1' into a quadruple-precision +floating-point value, returning the result. After being shifted into the +proper positions, the three fields `zSign', `zExp', and `zSig0' are simply +added together to form the most significant 32 bits of the result. This +means that any integer portion of `zSig0' will be added into the exponent. +Since a properly normalized significand will have an integer portion equal +to 1, the `zExp' input should be 1 less than the desired result exponent +whenever `zSig0' and `zSig1' concatenated form a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float128 + packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +{ + float128 z; + + z.low = zSig1; + z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and extended significand formed by the concatenation of `zSig0', `zSig1', +and `zSig2', and returns the proper quadruple-precision floating-point value +corresponding to the abstract input. Ordinarily, the abstract value is +simply rounded and packed into the quadruple-precision format, with the +inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal quadruple- +precision floating-point number. + The input significand must be normalized or smaller. If the input +significand is not normalized, `zExp' must be 0; in that case, the result +returned is a subnormal number, and it must not require rounding. In the +usual case that the input significand is normalized, `zExp' must be 1 less +than the ``true'' floating-point exponent. The handling of underflow and +overflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 + roundAndPackFloat128( + flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + increment = ( (sbits64) zSig2 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + if ( 0x7FFD <= (bits32) zExp ) { + if ( ( 0x7FFD < zExp ) + || ( ( zExp == 0x7FFD ) + && eq128( + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ), + zSig0, + zSig1 + ) + && increment + ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return + packFloat128( + zSign, + 0x7FFE, + LIT64( 0x0000FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ! increment + || lt128( + zSig0, + zSig1, + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); + zExp = 0; + if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); + if ( roundNearestEven ) { + increment = ( (sbits64) zSig2 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + } + if ( zSig2 ) float_exception_flags |= float_flag_inexact; + if ( increment ) { + add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); + zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + } + else { + if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; + } + return packFloat128( zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand formed by the concatenation of `zSig0' and `zSig1', and +returns the proper quadruple-precision floating-point value corresponding to +the abstract input. This routine is just like `roundAndPackFloat128' except +that the input significand has fewer bits and does not have to be normalized +in any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float128 + normalizeRoundAndPackFloat128( + flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +{ + int8 shiftCount; + bits64 zSig2; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ) - 15; + if ( 0 <= shiftCount ) { + zSig2 = 0; + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + } + else { + shift128ExtraRightJamming( + zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); + } + zExp -= shiftCount; + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the single-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int32 a ) +{ + flag zSign; + + if ( a == 0 ) return 0; + if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the double-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 int32_to_float64( int32 a ) +{ + flag aSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return 0; + aSign = ( a < 0 ); + absA = aSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 21; + zSig = absA; + return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' +to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 int32_to_floatx80( int32 a ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 32; + zSig = absA; + return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the quadruple-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 int32_to_float128( int32 a ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig0; + + if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 17; + zSig0 = absA; + return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= 0x00800000; + shiftCount = 0xAF - aExp; + zSig = aSig; + zSig <<= 32; + if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); + return roundAndPackInt32( aSign, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( a == 0xCF000000 ) return 0x80000000; + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + return 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + float_exception_flags |= float_flag_inexact; + } + return aSign ? - z : z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float32_to_floatx80( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + aSig |= 0x00800000; + return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float32_to_float128( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, and +returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig; + bits64 zSig64; + bits32 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); + zSig = zSig64; + if ( 0 <= (sbits32) ( zSig<<1 ) ) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = ( ( (bits64) aSig )<<32 ) / bSig; + if ( ( zSig & 0x3F ) == 0 ) { + zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig; + bits32 q; + bits64 aSig64, bSig64, q64; + bits32 alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig |= 0x00800000; + bSig |= 0x00800000; + if ( expDiff < 32 ) { + aSig <<= 8; + bSig <<= 8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + if ( 0 < expDiff ) { + q = ( ( (bits64) aSig )<<32 ) / bSig; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + } + else { + if ( bSig <= aSig ) aSig -= bSig; + aSig64 = ( (bits64) aSig )<<40; + bSig64 = ( (bits64) bSig )<<40; + expDiff -= 64; + while ( 0 < expDiff ) { + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + aSig64 = - ( ( bSig * q64 )<<38 ); + expDiff -= 62; + } + expDiff += 64; + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + q = q64>>( 64 - expDiff ); + bSig <<= 6; + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig; + bits64 rem, term; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0xFFFFFFFF; + } + else { + aSig >>= aExp & 1; + term = ( (bits64) zSig ) * zSig; + rem = ( ( (bits64) aSig )<<32 ) - term; + while ( (sbits64) rem < 0 ) { + --zSig; + rem += ( ( (bits64) zSig )<<1 ) | 1; + } + zSig |= ( rem != 0 ); + } + } + shift32RightJamming( zSig, 1, &zSig ); + return roundAndPackFloat32( 0, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + //int16 aExp, bExp; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = 0x433 - aExp; + if ( shiftCount < 21 ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( 52 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement unsigned integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest positive integer is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_uint32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = 0; //extractFloat64Sign( a ); + //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest positive integer is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_uint32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = 0x433 - aExp; + if ( shiftCount < 21 ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( 52 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + bits32 zSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 22, &aSig ); + zSig = aSig; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float64_to_floatx80( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the quadruple-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float64_to_float128( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, and +returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + if ( aExp <= 0x3FE ) { + if ( (bits64) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { + return packFloat64( aSign, 0x3FF, 0 ); + } + break; + case float_round_down: + return aSign ? LIT64( 0xBFF0000000000000 ) : 0; + case float_round_up: + return + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); + } + return packFloat64( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= LIT64( 0x2000000000000000 ); + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits64) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + bSig |= LIT64( 0x4000000000000000 ); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + aSig |= LIT64( 0x4000000000000000 ); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FF; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits64) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + bits64 rem0, rem1; + bits64 term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0 ); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FD; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64( aSig, 0, bSig ); + if ( ( zSig & 0x1FF ) <= 2 ) { + mul64To128( bSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits64 aSig, bSig; + bits64 q, alternateASig; + sbits64 sigMean; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits64) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits64) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits64 aSig, zSig; + bits64 rem0, rem1, term0, term1; //, shiftedRem; + //float64 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig |= LIT64( 0x0010000000000000 ); + zSig = estimateSqrt32( aExp, aSig>>21 ); + zSig <<= 31; + aSig <<= 9 - ( aExp & 1 ); + zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2; + if ( ( zSig & 0x3FF ) <= 5 ) { + if ( zSig < 2 ) { + zSig = LIT64( 0xFFFFFFFFFFFFFFFF ); + } + else { + aSig <<= 2; + mul64To128( zSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + shortShift128Left( 0, zSig, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + } + shift64RightJamming( zSig, 1, &zSig ); + return roundAndPackFloat64( 0, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + //int16 aExp, bExp; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic---which means in particular that the conversion +is rounded according to the current rounding mode. If `a' is a NaN, the +largest positive integer is returned. Otherwise, if the conversion +overflows, the largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic, except that the conversion is always rounded +toward zero. If `a' is a NaN, the largest positive integer is returned. +Otherwise, if the conversion overflows, the largest integer with the same +sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32_round_to_zero( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount < 32 ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( 63 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the single-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 floatx80_to_float32( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 33, &aSig ); + if ( aExp || aSig ) aExp -= 0x3F81; + return roundAndPackFloat32( aSign, aExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 floatx80_to_float64( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig, zSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shift64RightJamming( aSig, 1, &zSig ); + if ( aExp || aSig ) aExp -= 0x3C01; + return roundAndPackFloat64( aSign, aExp, zSig ); + +} + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the quadruple-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 floatx80_to_float128( floatx80 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); + } + shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp, zSig0, zSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the extended double-precision floating-point value `a' to an integer, +and returns the result as an extended quadruple-precision floating-point +value. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + floatx80 z; + + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN( a, a ); + } + return a; + } + if ( aExp <= 0x3FFE ) { + if ( ( aExp == 0 ) + && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + return a; + } + float_exception_flags |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.low += lastBitMask>>1; + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z.low += roundBitsMask; + } + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the extended double- +precision floating-point values `a' and `b'. If `zSign' is true, the sum is +negated before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; + } + zExp = aExp; + goto shiftRight1; + } + + zSig0 = aSig + bSig; + + if ( (sbits64) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the extended +double-precision floating-point values `a' and `b'. If `zSign' is true, +the difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return + normalizeRoundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the extended double-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_add( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs( a, b, aSign ); + } + else { + return subFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sub( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs( a, b, aSign ); + } + else { + return addFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_mul( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (sbits64) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the extended double-precision floating-point +value `a' by the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_div( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + bits64 rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (bits64) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the extended double-precision floating-point value +`a' with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_rem( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig; + bits64 q, term0, term1, alternateASig0, alternateASig1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (bits64) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the extended double-precision floating-point +value `a'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sqrt( floatx80 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + bits64 shiftedRem0, shiftedRem1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + zSig0 <<= 31; + aSig1 = 0; + shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4; + if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); + shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 ); + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + shortShift128Left( 0, zSig0, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 ); + zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); + if ( (bits64) ( zSig1<<1 ) <= 10 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( zSig0, zSig1, &term1, &term2 ); + shortShift128Left( term1, term2, 1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); + term3 |= 1; + add192( + rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than or equal to the corresponding value `b', and 0 otherwise. The +comparison is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is equal +to the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq_signaling( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs +do not cause an exception. Otherwise, the comparison is performed according +to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause +an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the 32-bit two's complement integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float128_to_int32( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x4028 - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); + return roundAndPackInt32( aSign, aSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the 32-bit two's complement integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float128_to_int32_round_to_zero( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1, savedASig; + int32 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x402F - aExp; + if ( shiftCount < 17 ) { + if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; + goto invalid; + } + else if ( 48 < shiftCount ) { + if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig0 |= LIT64( 0x0001000000000000 ); + savedASig = aSig0; + aSig0 >>= shiftCount; + z = aSig0; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig0<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the single-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float128_to_float32( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + bits32 zSig; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float128ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + aSig0 |= ( aSig1 != 0 ); + shift64RightJamming( aSig0, 18, &aSig0 ); + zSig = aSig0; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x3F81; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float128_to_float64( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat64( float128ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + aSig0 |= ( aSig1 != 0 ); + if ( aExp || aSig0 ) { + aSig0 |= LIT64( 0x4000000000000000 ); + aExp -= 0x3C01; + } + return roundAndPackFloat64( aSign, aExp, aSig0 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the extended double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float128_to_floatx80( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloatx80( float128ToCommonNaN( a ) ); + } + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); + return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the quadruple-precision floating-point value `a' to an integer, and +returns the result as a quadruple-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float128 z; + + aExp = extractFloat128Exp( a ); + if ( 0x402F <= aExp ) { + if ( 0x406F <= aExp ) { + if ( ( aExp == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) + ) { + return propagateFloat128NaN( a, a ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits64) z.low < 0 ) { + ++z.high; + if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp <= 0x3FFE ) { + if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat128Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) + && ( extractFloat128Frac0( a ) + | extractFloat128Frac1( a ) ) + ) { + return packFloat128( aSign, 0x3FFF, 0, 0 ); + } + break; + case float_round_down: + return + aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) + : packFloat128( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat128( 1, 0, 0, 0 ) + : packFloat128( 0, 0x3FFF, 0, 0 ); + } + return packFloat128( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x402F - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the quadruple-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int32 expDiff; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b ); + } + return a; + } + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); + zSig2 = 0; + zSig0 |= LIT64( 0x0002000000000000 ); + zExp = aExp; + goto shiftRight1; + } + aSig0 |= LIT64( 0x0001000000000000 ); + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; + ++zExp; + shiftRight1: + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the quadruple- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int32 expDiff; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= LIT64( 0x4000000000000000 ); + bBigger: + sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= LIT64( 0x4000000000000000 ); + aBigger: + sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the quadruple-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_add( float128 a, float128 b ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return addFloat128Sigs( a, b, aSign ); + } + else { + return subFloat128Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the quadruple-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_sub( float128 a, float128 b ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return subFloat128Sigs( a, b, aSign ); + } + else { + return addFloat128Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the quadruple-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_mul( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x4000; + aSig0 |= LIT64( 0x0001000000000000 ); + shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); + mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the quadruple-precision floating-point value +`a' by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_div( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + goto invalid; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FFD; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { + shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); + mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 4 ) { + mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the quadruple-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_rem( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig0, bSig1; + bits64 q, term0, term1, term2, allZero, alternateASig0, alternateASig1; + bits64 sigMean1; + sbits64 sigMean0; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), + aSig1, + 15 - ( expDiff < 0 ), + &aSig0, + &aSig1 + ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + q = le128( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); + shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); + sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 61; + } + if ( -64 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + expDiff += 52; + if ( expDiff < 0 ) { + shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits64) aSig0 ); + add128( + aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits64) aSig0 < 0 ); + if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the quadruple-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_sqrt( float128 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1, zSig2; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + bits64 shiftedRem0, shiftedRem1; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; + aSig0 |= LIT64( 0x0001000000000000 ); + zSig0 = estimateSqrt32( aExp, aSig0>>17 ); + zSig0 <<= 31; + shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4; + if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); + shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 ); + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + shortShift128Left( 0, zSig0, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 ); + zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( zSig0, zSig1, &term1, &term2 ); + shortShift128Left( term1, term2, 1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); + term3 |= 1; + add192( + rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_eq( float128 a, float128 b ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_le( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_lt( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_eq_signaling( float128 a, float128 b ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_le_quiet( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_lt_quiet( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h new file mode 100644 index 000000000..26745a4cb --- /dev/null +++ b/arch/arm/nwfpe/softfloat.h @@ -0,0 +1,290 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#ifndef __SOFTFLOAT_H__ +#define __SOFTFLOAT_H__ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. The same applies to +the `FLOAT128' macro and the quadruple-precision format `float128'. +------------------------------------------------------------------------------- +*/ +#define FLOATX80 +/* #define FLOAT128 */ + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef unsigned long int float32; +typedef unsigned long long float64; +#ifdef FLOATX80 +typedef struct { + unsigned short high; + unsigned long long low; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { + unsigned long long high, low; +} float128; +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +extern signed char float_detect_tininess; +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern signed char float_rounding_mode; +enum { + float_round_nearest_even = 0, + float_round_to_zero = 1, + float_round_down = 2, + float_round_up = 3 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +extern signed char float_exception_flags; +enum { + float_flag_inexact = 1, + float_flag_underflow = 2, + float_flag_overflow = 4, + float_flag_divbyzero = 8, + float_flag_invalid = 16 +}; + +ScottB: November 4, 1998 +Changed the enumeration to match the bit order in the FPA11. +*/ + +extern signed char float_exception_flags; +enum { + float_flag_invalid = 1, + float_flag_divbyzero = 2, + float_flag_overflow = 4, + float_flag_underflow = 8, + float_flag_inexact = 16 +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( signed char ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( signed int ); +float64 int32_to_float64( signed int ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( signed int ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( signed int ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float32_to_int32( float32 ); +signed int float32_to_int32_round_to_zero( float32 ); +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +char float32_eq( float32, float32 ); +char float32_le( float32, float32 ); +char float32_lt( float32, float32 ); +char float32_eq_signaling( float32, float32 ); +char float32_le_quiet( float32, float32 ); +char float32_lt_quiet( float32, float32 ); +char float32_is_signaling_nan( float32 ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float64_to_int32( float64 ); +signed int float64_to_int32_round_to_zero( float64 ); +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +char float64_eq( float64, float64 ); +char float64_le( float64, float64 ); +char float64_lt( float64, float64 ); +char float64_eq_signaling( float64, float64 ); +char float64_le_quiet( float64, float64 ); +char float64_lt_quiet( float64, float64 ); +char float64_is_signaling_nan( float64 ); + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int floatx80_to_int32( floatx80 ); +signed int floatx80_to_int32_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern signed char floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +char floatx80_eq( floatx80, floatx80 ); +char floatx80_le( floatx80, floatx80 ); +char floatx80_lt( floatx80, floatx80 ); +char floatx80_eq_signaling( floatx80, floatx80 ); +char floatx80_le_quiet( floatx80, floatx80 ); +char floatx80_lt_quiet( floatx80, floatx80 ); +char floatx80_is_signaling_nan( floatx80 ); + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float128_to_int32( float128 ); +signed int float128_to_int32_round_to_zero( float128 ); +float32 float128_to_float32( float128 ); +float64 float128_to_float64( float128 ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision operations. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 ); +float128 float128_add( float128, float128 ); +float128 float128_sub( float128, float128 ); +float128 float128_mul( float128, float128 ); +float128 float128_div( float128, float128 ); +float128 float128_rem( float128, float128 ); +float128 float128_sqrt( float128 ); +char float128_eq( float128, float128 ); +char float128_le( float128, float128 ); +char float128_lt( float128, float128 ); +char float128_eq_signaling( float128, float128 ); +char float128_le_quiet( float128, float128 ); +char float128_lt_quiet( float128, float128 ); +char float128_is_signaling_nan( float128 ); + +#endif + +#endif diff --git a/arch/arm/vmlinux-armv.lds b/arch/arm/vmlinux-armv.lds index f0d4a86c5..681143172 100644 --- a/arch/arm/vmlinux-armv.lds +++ b/arch/arm/vmlinux-armv.lds @@ -7,50 +7,64 @@ OUTPUT_ARCH(arm) ENTRY(_start) SECTIONS { - _text = .; /* Text and read-only data */ - .text : { + _text = .; /* Text and read-only data */ + .text : { } /* Set text start address */ + + __init_begin = .; /* Init code and data */ + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(4096); + __init_end = .; + + __ebsa285_begin = .; + .text.ebsa285 : { *(.text.ebsa285) } + .data.ebsa285 : { *(.data.ebsa285) } + . = ALIGN(4096); + __ebsa285_end = .; + + __netwinder_begin = .; + .text.netwinder : { *(.text.netwinder) } + .data.netwinder : { *(.data.netwinder) } + . = ALIGN(4096); + __netwinder_end = .; + + .text.real : { /* Real text segment */ *(.text) *(.fixup) *(.gnu.warning) - } = 0x9090 + } + .text.lock : { *(.text.lock) } /* out-of-line lock text */ .rodata : { *(.rodata) } .kstrtab : { *(.kstrtab) } - . = ALIGN(16); /* Exception table */ + . = ALIGN(16); /* Exception table */ __start___ex_table = .; __ex_table : { *(__ex_table) } __stop___ex_table = .; - __start___ksymtab = .; /* Kernel symbol table */ + __start___ksymtab = .; /* Kernel symbol table */ __ksymtab : { *(__ksymtab) } __stop___ksymtab = .; - _etext = .; /* End of text section */ + _etext = .; /* End of text section */ . = ALIGN(8192); - .data : { /* Data */ + .data : { /* Data */ *(.init.task) *(.data) CONSTRUCTORS } - _edata = .; /* End of data section */ - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(4096); - __init_end = .; + _edata = .; /* End of data section */ - __bss_start = .; /* BSS */ + __bss_start = .; /* BSS */ .bss : { *(.bss) } _end = . ; - /* Stabs debugging sections. */ + /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } .stab.excl 0 : { *(.stab.excl) } diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 49be2fe7b..b5ede6508 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -95,10 +95,9 @@ CONFIG_BLK_DEV_CMD640=y # CONFIG_BLK_DEV_CMD640_ENHANCED is not set CONFIG_BLK_DEV_RZ1000=y CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_BLK_DEV_IDEDMA_PCI is not set # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_AEC6210 is not set -CONFIG_IDEDMA_AUTO=y # CONFIG_IDE_CHIPSETS is not set # @@ -355,7 +354,6 @@ CONFIG_EXT2_FS=y # # CONFIG_CODA_FS is not set CONFIG_NFS_FS=y -# CONFIG_NFSD_SUN is not set CONFIG_SUNRPC=y CONFIG_LOCKD=y # CONFIG_SMB_FS is not set diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index 9c6948c60..ae1980a42 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -148,7 +148,6 @@ static struct inode_operations proc_mca_inode_operations = { NULL, /* truncate */ NULL, /* permission */ NULL, /* smap */ - NULL, /* updatepage */ NULL /* revalidate */ }; #endif diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 5588f5ca8..9d3a8b4b5 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -1164,6 +1164,7 @@ static void smp_tune_scheduling (void) } unsigned int prof_multiplier[NR_CPUS]; +unsigned int prof_old_multiplier[NR_CPUS]; unsigned int prof_counter[NR_CPUS]; /* @@ -1187,6 +1188,7 @@ void __init smp_boot_cpus(void) for (i = 0; i < NR_CPUS; i++) { cpu_number_map[i] = -1; prof_counter[i] = 1; + prof_old_multiplier[i] = 1; prof_multiplier[i] = 1; } @@ -1733,6 +1735,10 @@ int smp_call_function (void (*func) (void *info), void *info, int retry, return 0; } +static unsigned int calibration_result; + +void setup_APIC_timer(unsigned int clocks); + /* * Local timer interrupt handler. It does both profiling and * process statistics/rescheduling. @@ -1745,6 +1751,7 @@ int smp_call_function (void (*func) (void *info), void *info, int retry, void smp_local_timer_interrupt(struct pt_regs * regs) { + int user = (user_mode(regs) != 0); int cpu = smp_processor_id(); /* @@ -1753,25 +1760,34 @@ void smp_local_timer_interrupt(struct pt_regs * regs) * updated with atomic operations). This is especially * useful with a profiling multiplier != 1 */ - if (!user_mode(regs)) + if (!user) x86_do_profile(regs->eip); if (!--prof_counter[cpu]) { - int user=0,system=0; + int system = 1 - user; struct task_struct * p = current; /* + * The multiplier may have changed since the last time we got + * to this point as a result of the user writing to + * /proc/profile. In this case we need to adjust the APIC + * timer accordingly. + * + * Interrupts are already masked off at this point. + */ + prof_counter[cpu] = prof_multiplier[cpu]; + if (prof_counter[cpu] != prof_old_multiplier[cpu]) { + setup_APIC_timer(calibration_result/prof_counter[cpu]); + prof_old_multiplier[cpu] = prof_counter[cpu]; + } + + /* * After doing the above, we need to make like * a normal interrupt - otherwise timer interrupts * ignore the global interrupt lock, which is the * WrongThing (tm) to do. */ - if (user_mode(regs)) - user=1; - else - system=1; - irq_enter(cpu, 0); update_one_process(p, 1, user, system, cpu); if (p->pid) { @@ -1791,7 +1807,6 @@ void smp_local_timer_interrupt(struct pt_regs * regs) kstat.per_cpu_system[cpu] += system; } - prof_counter[cpu]=prof_multiplier[cpu]; irq_exit(cpu, 0); } @@ -2064,8 +2079,6 @@ int __init calibrate_APIC_clock(void) return calibration_result; } -static unsigned int calibration_result; - void __init setup_APIC_clock(void) { unsigned long flags; @@ -2117,13 +2130,10 @@ void __init setup_APIC_clock(void) /* * the frequency of the profiling timer can be changed * by writing a multiplier value into /proc/profile. - * - * usually you want to run this on all CPUs ;) */ int setup_profiling_timer(unsigned int multiplier) { - int cpu = smp_processor_id(); - unsigned long flags; + int i; /* * Sanity check. [at least 500 APIC cycles should be @@ -2133,11 +2143,14 @@ int setup_profiling_timer(unsigned int multiplier) if ( (!multiplier) || (calibration_result/multiplier < 500)) return -EINVAL; - save_flags(flags); - cli(); - setup_APIC_timer(calibration_result/multiplier); - prof_multiplier[cpu]=multiplier; - restore_flags(flags); + /* + * Set the new multiplier for each CPU. CPUs don't start using the + * new values until the next timer interrupt in which they do process + * accounting. At that time they also adjust their APIC timers + * accordingly. + */ + for (i = 0; i < NR_CPUS; ++i) + prof_multiplier[i] = multiplier; return 0; } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 22ebf7a10..bef9ea2fe 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -159,10 +159,10 @@ void show_mem(void) reserved++; else if (PageSwapCache(mem_map+i)) cached++; - else if (!atomic_read(&mem_map[i].count)) + else if (!page_count(mem_map+i)) free++; else - shared += atomic_read(&mem_map[i].count) - 1; + shared += page_count(mem_map+i) - 1; } printk("%d pages of RAM\n",total); printk("%d reserved pages\n",reserved); @@ -449,7 +449,7 @@ __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem)) reservedpages++; continue; } - atomic_set(&mem_map[MAP_NR(tmp)].count, 1); + set_page_count(mem_map+MAP_NR(tmp), 1); #ifdef CONFIG_BLK_DEV_INITRD if (!initrd_start || (tmp < initrd_start || tmp >= initrd_end)) @@ -475,7 +475,7 @@ void free_initmem(void) addr = (unsigned long)(&__init_begin); for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved); - atomic_set(&mem_map[MAP_NR(addr)].count, 1); + set_page_count(mem_map+MAP_NR(addr), 1); free_page(addr); } printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); @@ -494,9 +494,9 @@ void si_meminfo(struct sysinfo *val) if (PageReserved(mem_map+i)) continue; val->totalram++; - if (!atomic_read(&mem_map[i].count)) + if (!page_count(mem_map+i)) continue; - val->sharedram += atomic_read(&mem_map[i].count) - 1; + val->sharedram += page_count(mem_map+i) - 1; } val->totalram <<= PAGE_SHIFT; val->sharedram <<= PAGE_SHIFT; diff --git a/arch/mips/defconfig b/arch/mips/defconfig index a476aeb87..80285f390 100644 --- a/arch/mips/defconfig +++ b/arch/mips/defconfig @@ -376,10 +376,6 @@ CONFIG_NLS_KOI8_R=m # CONFIG_VGA_CONSOLE=y # CONFIG_FB is not set -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y # # Sound @@ -391,4 +387,5 @@ CONFIG_FONT_8x16=y # # CONFIG_CROSSCOMPILE is not set # CONFIG_MIPS_FPE_MODULE is not set +# CONFIG_REMOTE_DEBUG is not set # CONFIG_MAGIC_SYSRQ is not set diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 544c3e186..94282cc56 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.12 1999/02/25 21:06:44 tsbogend Exp $ +/* $Id: init.c,v 1.13 1999/05/01 22:40:40 ralf Exp $ * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -132,7 +132,7 @@ static inline unsigned long setup_zero_pages(void) pg = MAP_NR(empty_zero_page); while(pg < MAP_NR(empty_zero_page) + (1 << order)) { set_bit(PG_reserved, &mem_map[pg].flags); - atomic_set(&mem_map[pg].count, 0); + set_page_count(mem_map + pg, 0); pg++; } @@ -254,10 +254,10 @@ void show_mem(void) reserved++; else if (PageSwapCache(mem_map+i)) cached++; - else if (!atomic_read(&mem_map[i].count)) + else if (!page_count(mem_map + i)) free++; else - shared += atomic_read(&mem_map[i].count) - 1; + shared += page_count(mem_map + i) - 1; } printk("%d pages of RAM\n", total); printk("%d reserved pages\n", reserved); @@ -324,7 +324,7 @@ __initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem)) continue; } num_physpages++; - atomic_set(&mem_map[MAP_NR(tmp)].count, 1); + set_page_count(mem_map + MAP_NR(tmp), 1); #ifdef CONFIG_BLK_DEV_INITRD if (!initrd_start || (tmp < initrd_start || tmp >= initrd_end)) @@ -354,7 +354,7 @@ void free_initmem(void) addr = (unsigned long)(&__init_begin); for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved); - atomic_set(&mem_map[MAP_NR(addr)].count, 1); + set_page_count(mem_map + MAP_NR(addr), 1); free_page(addr); } printk("Freeing unused kernel memory: %dk freed\n", @@ -374,9 +374,9 @@ void si_meminfo(struct sysinfo *val) if (PageReserved(mem_map+i)) continue; val->totalram++; - if (!atomic_read(&mem_map[i].count)) + if (!page_count(mem_map + i)) continue; - val->sharedram += atomic_read(&mem_map[i].count) - 1; + val->sharedram += page_count(mem_map + i) - 1; } val->totalram <<= PAGE_SHIFT; val->sharedram <<= PAGE_SHIFT; diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index e69cc9ef0..1caf1143a 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -259,7 +259,6 @@ CONFIG_NFSD=m CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_SMB_FS=m -CONFIG_SMB_WIN95=y CONFIG_NCP_FS=m # CONFIG_NCPFS_PACKET_SIGNING is not set # CONFIG_NCPFS_IOCTL_LOCKING is not set diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c index e6023f43b..9f7b5b1f7 100644 --- a/arch/sparc/kernel/signal.c +++ b/arch/sparc/kernel/signal.c @@ -1,4 +1,4 @@ -/* $Id: signal.c,v 1.91 1999/01/26 11:00:44 jj Exp $ +/* $Id: signal.c,v 1.92 1999/06/14 05:23:53 davem Exp $ * linux/arch/sparc/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds @@ -1194,6 +1194,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs, default: lock_kernel(); sigaddset(¤t->signal, signr); + recalc_sigpending(current); current->flags |= PF_SIGNALED; do_exit(exit_code); /* NOT REACHED */ diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c index 050ba65db..ed610942e 100644 --- a/arch/sparc/kernel/sys_sunos.c +++ b/arch/sparc/kernel/sys_sunos.c @@ -1,4 +1,4 @@ -/* $Id: sys_sunos.c,v 1.98 1999/06/09 08:23:39 davem Exp $ +/* $Id: sys_sunos.c,v 1.99 1999/06/11 11:40:39 davem Exp $ * sys_sunos.c: SunOS specific syscall compatibility support. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -197,7 +197,7 @@ asmlinkage int sunos_brk(unsigned long brk) * fool it, but this should catch most mistakes. */ freepages = buffermem >> PAGE_SHIFT; - freepages += page_cache_size; + freepages += atomic_read(&page_cache_size); freepages >>= 1; freepages += nr_free_pages; freepages += nr_swap_pages; @@ -209,7 +209,7 @@ asmlinkage int sunos_brk(unsigned long brk) * Ok, we have probably got enough memory - let it rip. */ current->mm->brk = brk; - do_brk(oldbrk, newbrk-oldbrk) + do_brk(oldbrk, newbrk-oldbrk); retval = 0; out: up(¤t->mm->mmap_sem); diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 051c6560a..86eb05819 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -299,7 +299,6 @@ CONFIG_NFSD=m CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_SMB_FS=m -CONFIG_SMB_WIN95=y CONFIG_NCP_FS=m # CONFIG_NCPFS_PACKET_SIGNING is not set # CONFIG_NCPFS_IOCTL_LOCKING is not set diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 00340d6d5..476d53558 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -34,7 +34,7 @@ * and that it is in the task area before calling this: this routine does * no checking. */ -static pte_t *get_page(struct task_struct * tsk, +static pte_t *ptrace_get_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr, int write) { pgd_t * pgdir; @@ -121,7 +121,7 @@ static inline unsigned long get_long(struct task_struct * tsk, pte_t * pgtable; unsigned long page, retval; - if (!(pgtable = get_page (tsk, vma, addr, 0))) return 0; + if (!(pgtable = ptrace_get_page (tsk, vma, addr, 0))) return 0; page = pte_page(*pgtable); /* this is a hack for non-kernel-mapped video buffers and similar */ if (MAP_NR(page) >= max_mapnr) @@ -138,7 +138,7 @@ static inline void put_long(struct task_struct * tsk, struct vm_area_struct * vm pte_t *pgtable; unsigned long page; - if (!(pgtable = get_page (tsk, vma, addr, 1))) return; + if (!(pgtable = ptrace_get_page (tsk, vma, addr, 1))) return; page = pte_page(*pgtable); /* this is a hack for non-kernel-mapped video buffers and similar */ flush_cache_page(vma, addr); @@ -166,7 +166,7 @@ static inline unsigned int get_int(struct task_struct * tsk, unsigned long page; unsigned int retval; - if (!(pgtable = get_page (tsk, vma, addr, 0))) return 0; + if (!(pgtable = ptrace_get_page (tsk, vma, addr, 0))) return 0; page = pte_page(*pgtable); /* this is a hack for non-kernel-mapped video buffers and similar */ if (MAP_NR(page) >= max_mapnr) @@ -183,7 +183,7 @@ static inline void put_int(struct task_struct * tsk, struct vm_area_struct * vma pte_t *pgtable; unsigned long page; - if (!(pgtable = get_page (tsk, vma, addr, 1))) return; + if (!(pgtable = ptrace_get_page (tsk, vma, addr, 1))) return; page = pte_page(*pgtable); /* this is a hack for non-kernel-mapped video buffers and similar */ flush_cache_page(vma, addr); @@ -941,7 +941,7 @@ asmlinkage void do_ptrace(struct pt_regs *regs) pt_error_return(regs, EIO); goto flush_and_out; } - pgtable = get_page (child, vma, src, 0); + pgtable = ptrace_get_page (child, vma, src, 0); up(&child->mm->mmap_sem); if (src & ~PAGE_MASK) { curlen = PAGE_SIZE - (src & ~PAGE_MASK); @@ -988,7 +988,7 @@ asmlinkage void do_ptrace(struct pt_regs *regs) pt_error_return(regs, EIO); goto flush_and_out; } - pgtable = get_page (child, vma, dest, 1); + pgtable = ptrace_get_page (child, vma, dest, 1); up(&child->mm->mmap_sem); if (dest & ~PAGE_MASK) { curlen = PAGE_SIZE - (dest & ~PAGE_MASK); diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index e0ba8aa1e..247afc77c 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -1,4 +1,4 @@ -/* $Id: signal.c,v 1.40 1999/06/02 19:19:52 jj Exp $ +/* $Id: signal.c,v 1.41 1999/06/14 05:23:58 davem Exp $ * arch/sparc64/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds @@ -898,6 +898,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs, default: lock_kernel(); sigaddset(¤t->signal, signr); + recalc_sigpending(current); current->flags |= PF_SIGNALED; do_exit(exit_code); /* NOT REACHED */ diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index d425132fd..b1190d244 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -1,4 +1,4 @@ -/* $Id: signal32.c,v 1.47 1998/10/13 09:07:40 davem Exp $ +/* $Id: signal32.c,v 1.48 1999/06/14 05:24:01 davem Exp $ * arch/sparc64/kernel/signal32.c * * Copyright (C) 1991, 1992 Linus Torvalds @@ -1336,6 +1336,7 @@ asmlinkage int do_signal32(sigset_t *oldset, struct pt_regs * regs, default: lock_kernel(); sigaddset(¤t->signal, signr); + recalc_sigpending(current); current->flags |= PF_SIGNALED; do_exit(exit_code); /* NOT REACHED */ diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c index 99e010e78..d375dc26b 100644 --- a/arch/sparc64/kernel/sys_sunos32.c +++ b/arch/sparc64/kernel/sys_sunos32.c @@ -164,7 +164,7 @@ asmlinkage int sunos_brk(u32 baddr) * fool it, but this should catch most mistakes. */ freepages = buffermem >> PAGE_SHIFT; - freepages += page_cache_size; + freepages += atomic_read(&page_cache_size); freepages >>= 1; freepages += nr_free_pages; freepages += nr_swap_pages; |