diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1999-06-22 23:05:57 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1999-06-22 23:05:57 +0000 |
commit | 51d3b7814cdccef9188240fe0cbd8d97ff2c7470 (patch) | |
tree | 5cbb01d0323d4f63ade66bdf48ba4a91aaa6df16 /arch/arm | |
parent | 52273a23c9a84336b93a35e4847fc88fac7eb0e4 (diff) |
Merge with Linux 2.3.7.
WARNING: 2.3.7 is known to eat filesystems for breakfast and little
children for lunch, so if you try this on your machine make backups
first ...
Diffstat (limited to 'arch/arm')
84 files changed, 15109 insertions, 2493 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 967ee6766..1c198989d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,21 +10,31 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1995, 1996 by Russell King +# Copyright (C) 1995-1999 by Russell King CFLAGS_PROC := ASFLAGS_PROC := -# All processors get `-mshort-load-bytes' for now, to work around alignment -# problems. This is more of a hack that just happens to work than a real fix -# but it will do for now. +# GCC 2.7 uses different options to later compilers; sort out which we have +CONFIG_GCC_NEW := $(shell if $(CC) --version 2>&1 | grep '^2\.7' > /dev/null; then echo n; else echo y; fi) + +# Hack to get around RiscPC with StrongARM optimistaion +# problem - force ARM710 optimisation for now. +ifeq ($(CONFIG_GCC_NEW),y) + ifeq ($(CONFIG_ARCH_RPC),y) + ifeq ($(CONFIG_CPU_SA110),y) + CONFIG_CPU_SA110 := n + CONFIG_CPU_ARM7 := y + endif + endif +endif ifeq ($(CONFIG_CPU_26),y) PROCESSOR = armo TEXTADDR = 0x02080000 ZTEXTADDR = 0x01800000 ZRELADDR = 0x02080000 - ifeq ($(CONFIG_BINUTILS_NEW),y) + ifeq ($(CONFIG_GCC_NEW),y) CFLAGS_PROC += -mapcs-26 -mshort-load-bytes ifeq ($(CONFIG_CPU_ARM2),y) CFLAGS_PROC += -mcpu=arm2 @@ -49,7 +59,7 @@ endif ifeq ($(CONFIG_CPU_32),y) PROCESSOR = armv TEXTADDR = 0xC0008000 - ifeq ($(CONFIG_BINUTILS_NEW),y) + ifeq ($(CONFIG_GCC_NEW),y) CFLAGS_PROC += -mapcs-32 -mshort-load-bytes ifeq ($(CONFIG_CPU_ARM6),y) CFLAGS_PROC += -mcpu=arm6 @@ -68,10 +78,11 @@ endif # Processor Architecture # CFLAGS_PROC - processor dependent CFLAGS -# PROCESSOR - processor type -# TEXTADDR - Uncompressed kernel link text address -# ZTEXTADDR - Compressed kernel link text address -# ZRELADDR - Compressed kernel relocating address (point at which uncompressed kernel is loaded). +# PROCESSOR - processor type +# TEXTADDR - Uncompressed kernel link text address +# ZTEXTADDR - Compressed kernel link text address +# ZRELADDR - Compressed kernel relocating address +# (point at which uncompressed kernel is loaded). # COMPRESSED_HEAD = head.o @@ -79,19 +90,16 @@ COMPRESSED_HEAD = head.o ifeq ($(CONFIG_ARCH_A5K),y) MACHINE = a5k ARCHDIR = arc -COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o endif ifeq ($(CONFIG_ARCH_ARC),y) MACHINE = arc ARCHDIR = arc -COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o endif ifeq ($(CONFIG_ARCH_RPC),y) MACHINE = rpc ARCHDIR = rpc -COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o ZTEXTADDR = 0x10008000 ZRELADDR = 0x10008000 endif @@ -103,13 +111,17 @@ ZTEXTADDR = 0x00008000 ZRELADDR = 0x00008000 endif -ifeq ($(CONFIG_ARCH_EBSA285),y) -MACHINE = ebsa285 +ifeq ($(CONFIG_FOOTBRIDGE),y) +MACHINE = footbridge ARCHDIR = ebsa285 ZTEXTADDR = 0x00008000 ZRELADDR = 0x00008000 endif +ifeq ($(CONFIG_ARCH_CO285),y) +TEXTADDR = 0x60008000 +endif + ifeq ($(CONFIG_ARCH_NEXUSPCI),y) MACHINE = nexuspci ARCHDIR = nexuspci @@ -119,31 +131,13 @@ COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr_scc.o COMPRESSED_HEAD = head-nexuspci.o endif -ifeq ($(CONFIG_ARCH_VNC),y) -TEXTADDR = 0xC000C000 -MACHINE = vnc -ARCHDIR = vnc -endif - -ifeq ($(CONFIG_ARCH_TBOX),y) -MACHINE = tbox -ARCHDIR = tbox -ZTEXTADDR = 0x80008000 -ZRELDIR = 0x80008000 -endif - PERL = perl -ifeq ($(CONFIG_BINUTILS_NEW),y) -LD = $(CROSS_COMPILE)ld -m elf32arm -else -LD = $(CROSS_COMPILE)ld -m elf_arm -endif +LD = $(CROSS_COMPILE)ld OBJCOPY = $(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S OBJDUMP = $(CROSS_COMPILE)objdump CPP = $(CC) -E ARCHCC := $(word 1,$(CC)) GCCLIB := `$(CC) $(CFLAGS_PROC) --print-libgcc-file-name` -#GCCARCH := -B/usr/bin/arm-linuxelf- HOSTCFLAGS := $(CFLAGS:-fomit-frame-pointer=) ifeq ($(CONFIG_FRAME_POINTER),y) CFLAGS := $(CFLAGS:-fomit-frame-pointer=) @@ -153,75 +147,40 @@ ASFLAGS := $(ASFLAGS_PROC) $(ASFLAGS) LINKFLAGS = -T $(TOPDIR)/arch/arm/vmlinux-$(PROCESSOR).lds -e stext -Ttext $(TEXTADDR) ZLINKFLAGS = -Ttext $(ZTEXTADDR) -SUBDIRS := $(SUBDIRS:drivers=arch/arm/drivers) arch/arm/lib arch/arm/kernel arch/arm/mm -HEAD := arch/arm/kernel/head-$(PROCESSOR).o arch/arm/kernel/init_task.o +# If we're intending to debug the kernel, make sure it has line number +# information. This gets stripped out when building (z)Image so it doesn't +# add anything to the footprint of the running kernel. +ifeq ($(CONFIG_DEBUG_INFO),y) +CFLAGS += -g +endif + +HEAD := arch/arm/kernel/head-$(PROCESSOR).o \ + arch/arm/kernel/init_task.o +SUBDIRS := arch/arm/special $(SUBDIRS) arch/arm/lib arch/arm/kernel \ + arch/arm/mm arch/arm/nwfpe CORE_FILES := arch/arm/kernel/kernel.o arch/arm/mm/mm.o $(CORE_FILES) LIBS := arch/arm/lib/lib.a $(LIBS) $(GCCLIB) - -BLOCK_DRIVERS := drivers/block/block.a -CDROM_DRIVERS := drivers/cdrom/cdrom.a -ifeq ($(CONFIG_FB),y) -CHAR_DRIVERS := arch/arm/drivers/char1/char1.a drivers/char/char.a arch/arm/drivers/char1/char1.a -else -ifeq ($(CONFIG_VGA_CONSOLE),y) -CHAR_DRIVERS := arch/arm/drivers/char1/char1.a drivers/char/char.a arch/arm/drivers/char1/char1.a -else -CHAR_DRIVERS := arch/arm/drivers/char/char.a -endif -endif -MISC_DRIVERS := drivers/misc/misc.a -NET_DRIVERS := drivers/net/net.a -PARIDE_DRIVERS := drivers/block/paride/paride.a -PCI_DRIVERS := drivers/pci/pci.a -SCSI_DRIVERS := drivers/scsi/scsi.a -SOUND_DRIVERS := drivers/sound/sound.a -VIDEO_DRIVERS := drivers/video/video.a -PNP_DRIVERS := drivers/pnp/pnp.a +DRIVERS += arch/arm/special/special.a ifeq ($(CONFIG_ARCH_ACORN),y) -BLOCK_DRIVERS += drivers/acorn/block/acorn-block.a -CHAR_DRIVERS += drivers/acorn/char/acorn-char.a -NET_DRIVERS += drivers/acorn/net/acorn-net.a drivers/net/net.a -SCSI_DRIVERS += drivers/acorn/scsi/acorn-scsi.a +SUBDIRS += drivers/acorn/block drivers/acorn/char drivers/acorn/net \ + drivers/acorn/scsi +DRIVERS += drivers/acorn/block/acorn-block.a \ + drivers/acorn/char/acorn-char.a \ + drivers/acorn/net/acorn-net.a \ + drivers/acorn/scsi/acorn-scsi.a endif -DRIVERS := $(BLOCK_DRIVERS) $(CHAR_DRIVERS) $(MISC_DRIVERS) $(NET_DRIVERS) - -ifeq ($(CONFIG_FB),y) -DRIVERS := $(DRIVERS) $(VIDEO_DRIVERS) -else -ifeq ($(CONFIG_VGA_CONSOLE),y) -DRIVERS := $(DRIVERS) $(VIDEO_DRIVERS) -endif -endif -ifeq ($(CONFIG_SCSI),y) -DRIVERS := $(DRIVERS) $(SCSI_DRIVERS) -endif -ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR),) -DRIVERS := $(DRIVERS) $(CDROM_DRIVERS) -endif -ifdef CONFIG_PCI -DRIVERS := $(DRIVERS) $(PCI_DRIVERS) -endif -ifeq ($(CONFIG_SOUND),y) -DRIVERS := $(DRIVERS) $(SOUND_DRIVERS) -endif -ifeq ($(CONFIG_PARIDE),y) -DRIVERS := $(DRIVERS) $(PARIDE_DRIVERS) -endif -ifdef CONFIG_PNP -DRIVERS := $(DRIVERS) $(PNP_DRIVERS) +ifeq ($(CONFIG_NWFPE),y) +DRIVERS += arch/arm/nwfpe/math-emu.a endif +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + symlinks:: $(RM) include/asm-arm/arch include/asm-arm/proc (cd include/asm-arm; ln -sf arch-$(ARCHDIR) arch; ln -sf proc-$(PROCESSOR) proc) -# Once we've finished integrating the sources, the @$(MAKE) will disappear -archmrproper: - rm -f include/asm-arm/arch include/asm-arm/proc - @$(MAKE) -C arch/$(ARCH)/drivers mrproper - arch/arm/kernel: dummy $(MAKE) linuxsubdirs SUBDIRS=arch/arm/kernel @@ -231,19 +190,20 @@ arch/arm/mm: dummy arch/arm/lib: dummy $(MAKE) linuxsubdirs SUBDIRS=arch/arm/lib -MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot - -zImage: vmlinux - @$(MAKEBOOT) zImage +zImage zinstall Image install: vmlinux + @$(MAKEBOOT) $@ -zinstall: vmlinux - @$(MAKEBOOT) zinstall +# Once we've finished integrating the sources, the @$(MAKE) will disappear +archmrproper: + rm -f include/asm-arm/arch include/asm-arm/proc + @$(MAKE) -C arch/$(ARCH)/special mrproper -Image: vmlinux - @$(MAKEBOOT) Image +archclean: + @$(MAKEBOOT) clean + $(RM) arch/arm/lib/constants.h -install: vmlinux - @$(MAKEBOOT) install +archdep: + @$(MAKEBOOT) dep # My testing targets (that short circuit a few dependencies) zImg:; @$(MAKEBOOT) zImage @@ -251,10 +211,19 @@ Img:; @$(MAKEBOOT) Image i:; @$(MAKEBOOT) install zi:; @$(MAKEBOOT) zinstall -archclean: - @$(MAKEBOOT) clean - $(RM) arch/arm/lib/constants.h +a5k_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/a5k arch/arm/defconfig + +ebsa110_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/ebsa110 arch/arm/defconfig + +footbridge_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/footbridge arch/arm/defconfig + +rpc_config: + $(RM) arch/arm/defconfig + cp arch/arm/def-configs/rpc arch/arm/defconfig -archdep: - @$(MAKEBOOT) dep -sed -e /^MACHINE..*=/s,= .*,= rpc,;/^PROCESSOR..*=/s,= .*,= armv, linux/arch/arm/Makefile.normal diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 0c6a04c5b..cf1481ab0 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -11,10 +11,15 @@ HEAD =$(COMPRESSED_HEAD) OBJS =$(HEAD) misc.o $(COMPRESSED_EXTRA) CFLAGS =-O2 -DSTDC_HEADERS $(CFLAGS_PROC) ARFLAGS =rc +FONTC =$(TOPDIR)/drivers/video/font_acorn_8x8.c + +ifeq ($(CONFIG_ARCH_ACORN),y) +OBJS += ll_char_wr.o font.o +endif all: vmlinux -vmlinux: piggy.o $(OBJS) +vmlinux: $(OBJS) piggy.o $(LD) $(ZLINKFLAGS) -o vmlinux $(OBJS) piggy.o $(HEAD): $(HEAD:.o=.S) @@ -29,5 +34,8 @@ piggy.o: $(SYSTEM) $(LD) -r -o piggy.o -b binary $$tmppiggy.gz -b elf32-arm -T $$tmppiggy.lnk; \ rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk; +font.o: $(FONTC) + $(CC) -Dstatic= -c -o $@ $(FONTC) + clean:; rm -f vmlinux core diff --git a/arch/arm/lib/ll_char_wr.S b/arch/arm/boot/compressed/ll_char_wr.S index 966d2846e..57865f2fd 100644 --- a/arch/arm/lib/ll_char_wr.S +++ b/arch/arm/boot/compressed/ll_char_wr.S @@ -12,7 +12,7 @@ @ Regs: [] = corruptible @ {} = used @ () = do not use - +#define __ASSEMBLY__ #include <linux/linkage.h> #include <asm/assembler.h> .text @@ -25,7 +25,7 @@ LC0: .word SYMBOL_NAME(bytes_per_char_h) .word SYMBOL_NAME(video_size_row) - .word SYMBOL_NAME(cmap_80) + .word SYMBOL_NAME(acorndata_8x8) .word SYMBOL_NAME(con_charconvtable) ENTRY(ll_write_char) diff --git a/arch/arm/config.in b/arch/arm/config.in index 467218db7..2fea6a661 100644 --- a/arch/arm/config.in +++ b/arch/arm/config.in @@ -14,18 +14,31 @@ choice 'ARM system type' \ A5000 CONFIG_ARCH_A5K \ RiscPC CONFIG_ARCH_RPC \ EBSA-110 CONFIG_ARCH_EBSA110 \ - EBSA-285 CONFIG_ARCH_EBSA285 \ - NexusPCI CONFIG_ARCH_NEXUSPCI \ - Corel-VNC CONFIG_ARCH_VNC \ - Tbox CONFIG_ARCH_TBOX" RiscPC + FootBridge-based CONFIG_FOOTBRIDGE" RiscPC + +if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then + bool 'FootBridge in HOST mode' CONFIG_HOST_FOOTBRIDGE + if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then + define_bool CONFIG_ADDIN_FOOTBRIDGE n + else + define_bool CONFIG_ADDIN_FOOTBRIDGE y + fi +fi + +if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then + bool ' Include support for Intel EBSA285' CONFIG_ARCH_EBSA285 + bool ' Include support for Chalice CATS boards' CONFIG_CATS + bool ' Include support for Corel NetWinder' CONFIG_ARCH_NETWINDER +fi -if [ "$CONFIG_ARCH_EBSA285" = "y" ]; then - bool ' Include support for CATS boards' CONFIG_CATS +if [ "$CONFIG_ADDIN_FOOTBRIDGE" = "y" ]; then + # If we get any other footbridge-based plug-in boards, then + # add your architecture options here + define_bool CONFIG_ARCH_CO285 y fi # Select various configuration options depending on the machine type # Easy check for Acorn-style architectures - if [ "$CONFIG_ARCH_ARC" = "y" -o \ "$CONFIG_ARCH_A5K" = "y" -o \ "$CONFIG_ARCH_RPC" = "y" ]; then @@ -34,23 +47,19 @@ else define_bool CONFIG_ARCH_ACORN n fi -if [ "$CONFIG_ARCH_TBOX" = "y" ]; then - define_bool CONFIG_BUS_I2C y -fi +#if [ "$CONFIG_ARCH_TBOX" = "y" ]; then +# define_bool CONFIG_BUS_I2C y +#fi # These machines always have PCI - if [ "$CONFIG_ARCH_NEXUSPCI" = "y" -o \ - "$CONFIG_ARCH_VNC" = "y" ]; then + "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then define_bool CONFIG_PCI y fi -if [ "$CONFIG_ARCH_EBSA285" = "y" ]; then - bool "PCI support" CONFIG_PCI -fi # These machines have ISA-DMA if [ "$CONFIG_CATS" = "y" -o \ - "$CONFIG_ARCH_VNC" = "y" ]; then + "$CONFIG_ARCH_NETWINDER" = "y" ]; then define_bool CONFIG_ISA_DMA y else define_bool CONFIG_ISA_DMA n @@ -59,7 +68,6 @@ fi # Figure out whether this system uses 26-bit or 32-bit CPUs. Nobody has # ever built a machine that can take both, and now that ARM3 is obsolete # nobody is likely to either. - if [ "$CONFIG_ARCH_ARC" = "y" -o \ "$CONFIG_ARCH_A5K" = "y" ]; then define_bool CONFIG_CPU_32 n @@ -71,7 +79,6 @@ fi # Now allow the user to choose a more precise CPU. This is only used to set # the flags we pass to GCC, not in any code. - choice 'Optimise for CPU' \ "ARM2 CONFIG_CPU_ARM2 \ ARM3 CONFIG_CPU_ARM3 \ @@ -80,22 +87,21 @@ choice 'Optimise for CPU' \ SA110 CONFIG_CPU_SA110" ARM6 if [ "$CONFIG_CPU_26" = "y" ]; then - # For 26-bit CPUs, the page size changes with the amount of physical RAM! # The default is 4MB but if the user has less they have to own up to it here. - choice 'Physical memory size' \ "4MB+ CONFIG_PAGESIZE_32 \ - 2MB CONFIG_PAGESIZE_16 \ - 1MB/512K CONFIG_PAGESIZE_8" 4MB+ + 2MB CONFIG_PAGESIZE_16" 4MB+ fi endmenu mainmenu_option next_comment comment 'Code maturity level options' bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -bool 'Use new compilation options (for GCC 2.8)' CONFIG_BINUTILS_NEW -bool 'Compile kernel with frame pointer (for useful debugging)' CONFIG_FRAME_POINTER +if [ "$CONFIG_CPU_32" = "y" -a "$CONFIG_ARCH_EBSA110" != "y" ]; then + bool 'Enable kernel-mode alignment trap handler (EXPERIMENTAL)' CONFIG_ALIGNMENT_TRAP +fi +bool 'Split text into discardable sections' CONFIG_TEXT_SECTIONS endmenu mainmenu_option next_comment @@ -113,13 +119,19 @@ bool 'Networking support' CONFIG_NET bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL +tristate 'Math emulation' CONFIG_NWFPE tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC +if [ "$CONFIG_CPU_32" = "y" ]; then + tristate 'RISC OS personality' CONFIG_ARTHUR +fi tristate 'Parallel port support' CONFIG_PARPORT if [ "$CONFIG_PARPORT" != "n" ]; then - dep_tristate ' Archimedes hardware' CONFIG_PARPORT_ARC $CONFIG_PARPORT + if [ "$CONFIG_ARCH_ARC" = "y" ]; then + dep_tristate ' Archimedes hardware' CONFIG_PARPORT_ARC $CONFIG_PARPORT + fi dep_tristate ' PC-style hardware' CONFIG_PARPORT_PC $CONFIG_PARPORT # If exactly one hardware type is selected then parport will optimise away # support for loading any others. Defeat this if the user is keen. @@ -129,13 +141,29 @@ if [ "$CONFIG_PARPORT" != "n" ]; then fi fi fi -if [ "$CONFIG_ARCH_EBSA285" = "y" -o \ - "$CONFIG_ARCH_EBSA110" = "y" -o \ - "$CONFIG_ARCH_VNC" = "y" ]; then +if [ "$CONFIG_ARCH_EBSA110" = "y" -o \ + "$CONFIG_ARCH_NETWINDER" = "y" -o \ + "$CONFIG_CATS" = "y" ]; then string 'Initial kernel command string' CONFIG_CMDLINE fi +if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \ + "$CONFIG_ARCH_EBSA110" = "y" -o \ + "$CONFIG_ARCH_EBSA285" = "y" -o \ + "$CONFIG_ARCH_CO285" = "y" ]; then + bool 'Timer and CPU usage LEDs' CONFIG_LEDS + if [ "$CONFIG_LEDS" = "y" ]; then + if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \ + "$CONFIG_ARCH_EBSA285" = "y" -o \ + "$CONFIG_ARCH_CO285" = "y" ]; then + bool ' Timer LED' CONFIG_LEDS_TIMER + bool ' CPU usage LED' CONFIG_LEDS_CPU + fi + fi +fi endmenu +source drivers/i2o/Config.in + source drivers/pnp/Config.in source drivers/block/Config.in @@ -144,15 +172,19 @@ if [ "$CONFIG_ARCH_ACORN" = "y" ]; then source drivers/acorn/block/Config.in fi -if [ "$CONFIG_VGA_CONSOLE" = "n" -a "$CONFIG_FB" = "n" ]; then - source arch/arm/drivers/char/Config.in -else - source drivers/char/Config.in -fi +source drivers/char/Config.in if [ "$CONFIG_ARCH_ACORN" = "y" ]; then - source drivers/acorn/char/Config.in + if [ "$CONFIG_MOUSE" = "y" ]; then + if [ "$CONFIG_ARCH_RPC" != "y" ]; then + define_bool CONFIG_KBDMOUSE y + else + define_bool CONFIG_RPCMOUSE y + fi + fi fi +source drivers/usb/Config.in + if [ "$CONFIG_VT" = "y" ]; then mainmenu_option next_comment comment 'Console drivers' @@ -166,9 +198,11 @@ fi if [ "$CONFIG_NET" = "y" ]; then source net/Config.in -fi -if [ "$CONFIG_NET" = "y" ]; then + source net/ax25/Config.in + + source net/irda/Config.in + mainmenu_option next_comment comment 'Network device support' @@ -179,6 +213,15 @@ if [ "$CONFIG_NET" = "y" ]; then endmenu fi +# mainmenu_option next_comment +# comment 'ISDN subsystem' +# +# tristate 'ISDN support' CONFIG_ISDN +# if [ "$CONFIG_ISDN" != "n" ]; then +# source drivers/isdn/Config.in +# fi +# endmenu + mainmenu_option next_comment comment 'SCSI support' @@ -200,21 +243,29 @@ if [ "$CONFIG_ARCH_ACORN" = "y" -o "$CONFIG_PCI" = "y" ]; then endmenu fi -# mainmenu_option next_comment -# comment 'ISDN subsystem' -# -# tristate 'ISDN support' CONFIG_ISDN -# if [ "$CONFIG_ISDN" != "n" ]; then -# source drivers/isdn/Config.in -# fi -# endmenu - source fs/Config.in mainmenu_option next_comment comment 'Kernel hacking' -bool 'Debug kernel errors' CONFIG_DEBUG_ERRORS +bool 'Compile kernel with frame pointer (for useful debugging)' CONFIG_FRAME_POINTER +bool 'Verbose kernel error messages' CONFIG_DEBUG_ERRORS +bool 'Verbose user fault messages' CONFIG_DEBUG_USER +bool 'Include debugging information in kernel binary' CONFIG_DEBUG_INFO #bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + if [ "$CONFIG_CPU_26" = "y" ]; then + bool 'Disable pgtable cache (EXPERIMENTAL)' CONFIG_NO_PGT_CACHE + fi + + # These options are only for real kernel hackers + # who want to get their hands dirty. + bool 'Kernel low-level debugging functions' CONFIG_DEBUG_LL + if [ "$CONFIG_DEBUG_LL" = "y" ]; then + if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then + bool 'Kernel low-level debugging messages via DC21285 port' CONFIG_DEBUG_DC21285_PORT + fi + fi +fi endmenu diff --git a/arch/arm/defconfig b/arch/arm/defconfig index db89599be..ce85d6ffc 100644 --- a/arch/arm/defconfig +++ b/arch/arm/defconfig @@ -4,47 +4,71 @@ CONFIG_ARM=y # +# System and processor type +# +# CONFIG_ARCH_ARC is not set +# CONFIG_ARCH_A5K is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_EBSA110 is not set +CONFIG_FOOTBRIDGE=y +CONFIG_HOST_FOOTBRIDGE=y +# CONFIG_ADDIN_FOOTBRIDGE is not set +CONFIG_ARCH_EBSA285=y +# CONFIG_CATS is not set +CONFIG_ARCH_NETWINDER=y +# CONFIG_ARCH_ACORN is not set +CONFIG_PCI=y +CONFIG_ISA_DMA=y +CONFIG_CPU_32=y +# CONFIG_CPU_26 is not set +# CONFIG_CPU_ARM2 is not set +# CONFIG_CPU_ARM3 is not set +# CONFIG_CPU_ARM6 is not set +# CONFIG_CPU_ARM7 is not set +CONFIG_CPU_SA110=y + +# # Code maturity level options # CONFIG_EXPERIMENTAL=y +# CONFIG_ALIGNMENT_TRAP is not set +# CONFIG_TEXT_SECTIONS is not set # # Loadable module support # CONFIG_MODULES=y -CONFIG_MODVERSIONS=y +# CONFIG_MODVERSIONS is not set CONFIG_KMOD=y # # General setup # -# CONFIG_ARCH_ARC is not set -# CONFIG_ARCH_A5K is not set -CONFIG_ARCH_RPC=y -# CONFIG_ARCH_EBSA110 is not set -# CONFIG_ARCH_NEXUSPCI is not set -CONFIG_ARCH_ACORN=y -# CONFIG_PCI is not set -# CONFIG_CPU_ARM2 is not set -# CONFIG_CPU_ARM3 is not set -# CONFIG_CPU_ARM6 is not set -CONFIG_CPU_SA110=y -CONFIG_FRAME_POINTER=y -# CONFIG_BINUTILS_NEW is not set -CONFIG_DEBUG_ERRORS=y CONFIG_NET=y CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y +CONFIG_NWFPE=y CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=m -# CONFIG_BINFMT_JAVA is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +# CONFIG_ARTHUR is not set CONFIG_PARPORT=y CONFIG_PARPORT_PC=y +CONFIG_CMDLINE="root=/dev/hda2 ro mem=32M parport=0x378,7 ide0=autotune" +CONFIG_LEDS=y +CONFIG_LEDS_TIMER=y +# CONFIG_LEDS_CPU is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set # -# Floppy, IDE, and other block devices +# Block devices # -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set CONFIG_BLK_DEV_IDE=y # @@ -52,32 +76,165 @@ CONFIG_BLK_DEV_IDE=y # # CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y -CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDECD is not set # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_BLK_DEV_IDESCSI is not set -# CONFIG_BLK_DEV_IDE_PCMCIA is not set -CONFIG_BLK_DEV_IDE_CARDS=y -CONFIG_BLK_DEV_IDE_ICSIDE=y -# CONFIG_BLK_DEV_IDE_RAPIDE is not set -# CONFIG_BLK_DEV_XD is not set +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_BLK_DEV_IDEDMA=y +CONFIG_BLK_DEV_OFFBOARD=y +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_VIA82C586 is not set +# CONFIG_BLK_DEV_CMD646 is not set +CONFIG_BLK_DEV_SL82C105=y +# CONFIG_IDE_CHIPSETS is not set # # Additional Block Devices # CONFIG_BLK_DEV_LOOP=m -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_STRIPED=m +CONFIG_MD_MIRRORING=m +CONFIG_MD_RAID5=m CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_INITRD=y +# CONFIG_BLK_DEV_INITRD is not set +# CONFIG_BLK_DEV_XD is not set CONFIG_PARIDE_PARPORT=y -# CONFIG_PARIDE is not set -CONFIG_BLK_DEV_PART=y +CONFIG_PARIDE=m + +# +# Parallel IDE high-level drivers +# +CONFIG_PARIDE_PD=m +CONFIG_PARIDE_PCD=m +CONFIG_PARIDE_PF=m +CONFIG_PARIDE_PT=m +CONFIG_PARIDE_PG=m + +# +# Parallel IDE protocol modules +# +CONFIG_PARIDE_ATEN=m +CONFIG_PARIDE_BPCK=m +CONFIG_PARIDE_COMM=m +CONFIG_PARIDE_DSTR=m +CONFIG_PARIDE_FIT2=m +CONFIG_PARIDE_FIT3=m +CONFIG_PARIDE_EPAT=m +CONFIG_PARIDE_EPIA=m +CONFIG_PARIDE_FRIQ=m +CONFIG_PARIDE_FRPW=m +CONFIG_PARIDE_KBIC=m +CONFIG_PARIDE_KTTI=m +CONFIG_PARIDE_ON20=m +CONFIG_PARIDE_ON26=m # CONFIG_BLK_DEV_HD is not set # +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +# CONFIG_SERIAL_EXTENDED is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_UNIX98_PTYS is not set +CONFIG_PRINTER=m +CONFIG_PRINTER_READBACK=y +CONFIG_MOUSE=y + +# +# Mice +# +# CONFIG_ATIXL_BUSMOUSE is not set +# CONFIG_BUSMOUSE is not set +# CONFIG_MS_BUSMOUSE is not set +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_QIC02_TAPE is not set +CONFIG_WATCHDOG=y + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG_NOWAYOUT is not set +# CONFIG_WDT is not set +CONFIG_SOFT_WATCHDOG=y +# CONFIG_PCWATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +CONFIG_DS1620=y +CONFIG_NWBUTTON=y +CONFIG_NWBUTTON_REBOOT=y +CONFIG_NWFLASH=m +# CONFIG_NVRAM is not set +CONFIG_RTC=y + +# +# Video For Linux +# +# CONFIG_VIDEO_DEV is not set + +# +# Joystick support +# +# CONFIG_JOYSTICK is not set +# CONFIG_DTLK is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +CONFIG_FB=y +CONFIG_DUMMY_CONSOLE=y +# CONFIG_FB_PM2 is not set +CONFIG_FB_CYBER2000=y +# CONFIG_FB_MATROX is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_FBCON_ADVANCED=y +# CONFIG_FBCON_MFB is not set +# CONFIG_FBCON_CFB2 is not set +# CONFIG_FBCON_CFB4 is not set +CONFIG_FBCON_CFB8=y +CONFIG_FBCON_CFB16=y +CONFIG_FBCON_CFB24=y +# CONFIG_FBCON_CFB32 is not set +# CONFIG_FBCON_AFB is not set +# CONFIG_FBCON_ILBM is not set +# CONFIG_FBCON_IPLAN2P2 is not set +# CONFIG_FBCON_IPLAN2P4 is not set +# CONFIG_FBCON_IPLAN2P8 is not set +# CONFIG_FBCON_MAC is not set +CONFIG_FBCON_VGA=y +# CONFIG_FBCON_FONTWIDTH8_ONLY is not set +CONFIG_FBCON_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +# CONFIG_FONT_SUN8x16 is not set +# CONFIG_FONT_SUN12x22 is not set +# CONFIG_FONT_6x11 is not set +# CONFIG_FONT_PEARL_8x8 is not set +CONFIG_FONT_ACORN_8x8=y + +# # Networking options # -# CONFIG_PACKET is not set +CONFIG_PACKET=y # CONFIG_NETLINK is not set # CONFIG_FIREWALL is not set # CONFIG_FILTER is not set @@ -85,21 +242,20 @@ CONFIG_UNIX=y CONFIG_INET=y # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_IP_ACCT is not set -# CONFIG_IP_MASQUERADE is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set # CONFIG_IP_ROUTER is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_ALIAS is not set +CONFIG_IP_ALIAS=y # CONFIG_SYN_COOKIES is not set # # (it is safe to leave these untouched) # # CONFIG_INET_RARP is not set -CONFIG_IP_NOSR=y -# CONFIG_SKB_LARGE is not set +CONFIG_SKB_LARGE=y # CONFIG_IPV6 is not set # @@ -111,107 +267,198 @@ CONFIG_IP_NOSR=y # CONFIG_LAPB is not set # CONFIG_BRIDGE is not set # CONFIG_LLC is not set +# CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set # CONFIG_NET_FASTROUTE is not set # CONFIG_NET_HW_FLOWCONTROL is not set # CONFIG_CPU_IS_SLOW is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set -# CONFIG_NET_PROFILE is not set # -# SCSI support +# Amateur Radio support # -CONFIG_SCSI=y +# CONFIG_HAMRADIO is not set # -# SCSI support type (disk, tape, CD-ROM) +# IrDA subsystem support # -CONFIG_BLK_DEV_SD=y -# CONFIG_CHR_DEV_ST is not set -CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set -# CONFIG_CHR_DEV_SG is not set +# CONFIG_IRDA is not set # -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# Network device support # -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y +CONFIG_NETDEVICES=y +# CONFIG_ARCNET is not set +# CONFIG_DUMMY is not set +# CONFIG_EQUALIZER is not set +CONFIG_NET_ETHERNET=y +# CONFIG_ARM_AM79C961A is not set +CONFIG_NET_VENDOR_3COM=y +# CONFIG_EL1 is not set +# CONFIG_EL2 is not set +# CONFIG_ELPLUS is not set +# CONFIG_EL16 is not set +# CONFIG_EL3 is not set +# CONFIG_3C515 is not set +CONFIG_VORTEX=y +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_RTL8139 is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_ACENIC is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_EISA=y +# CONFIG_PCNET32 is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +# CONFIG_DE4X5 is not set +CONFIG_DEC_ELCP=m +# CONFIG_DGRS is not set +# CONFIG_EEXPRESS_PRO100 is not set +# CONFIG_LNE390 is not set +# CONFIG_NE3210 is not set +CONFIG_NE2K_PCI=y +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_ES3210 is not set +# CONFIG_EPIC100 is not set +# CONFIG_ZNET is not set +# CONFIG_NET_POCKET is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_DLCI is not set +# CONFIG_PLIP is not set +CONFIG_PPP=m # -# SCSI low-level drivers +# CCP compressors for PPP are only built as modules. # -CONFIG_SCSI_ACORNSCSI_3=m -CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE=y -CONFIG_SCSI_ACORNSCSI_SYNC=y -CONFIG_SCSI_CUMANA_2=m -CONFIG_SCSI_POWERTECSCSI=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y +# CONFIG_NET_RADIO is not set +# CONFIG_TR is not set +# CONFIG_SHAPER is not set +# CONFIG_HOSTESS_SV11 is not set +# CONFIG_COSA is not set +# CONFIG_RCPCI is not set # -# The following drives are not fully supported +# SCSI support # -CONFIG_SCSI_CUMANA_1=m -CONFIG_SCSI_OAK1=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_PPA_HAVE_PEDANTIC=2 +# CONFIG_SCSI is not set # -# Network device support +# Sound # -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_EQUALIZER is not set -CONFIG_PPP=m +CONFIG_SOUND=m +# CONFIG_SOUND_ES1370 is not set +# CONFIG_SOUND_ES1371 is not set +# CONFIG_SOUND_SONICVIBES is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_OSS=m +# CONFIG_SOUND_PAS is not set +CONFIG_SOUND_SB=m +CONFIG_SOUND_ADLIB=m +# CONFIG_SOUND_GUS is not set +# CONFIG_SOUND_MPU401 is not set +# CONFIG_SOUND_PSS is not set +# CONFIG_SOUND_MSS is not set +# CONFIG_SOUND_SSCAPE is not set +# CONFIG_SOUND_TRIX is not set +# CONFIG_SOUND_MAD16 is not set +# CONFIG_SOUND_WAVEFRONT is not set +# CONFIG_SOUND_CS4232 is not set +# CONFIG_SOUND_OPL3SA2 is not set +# CONFIG_SOUND_MAUI is not set +# CONFIG_SOUND_SGALAXY is not set +# CONFIG_SOUND_AD1816 is not set +# CONFIG_SOUND_OPL3SA1 is not set +# CONFIG_SOUND_SOFTOSS is not set +# CONFIG_SOUND_YM3812 is not set +# CONFIG_SOUND_VMIDI is not set +# CONFIG_SOUND_UART6850 is not set +# CONFIG_SOUND_VIDC is not set +CONFIG_SOUND_WAVEARTIST=m +CONFIG_WAVEARTIST_BASE=250 +CONFIG_WAVEARTIST_IRQ=12 +CONFIG_WAVEARTIST_DMA=3 +CONFIG_WAVEARTIST_DMA2=7 # -# CCP compressors for PPP are only built as modules. +# Additional low level sound drivers # -# CONFIG_SLIP is not set -CONFIG_ETHER1=m -CONFIG_ETHER3=m -CONFIG_ETHERH=m +# CONFIG_LOWLEVEL_SOUND is not set # # Filesystems # # CONFIG_QUOTA is not set -# CONFIG_MINIX_FS is not set -CONFIG_EXT2_FS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=y +# CONFIG_AUTOFS_FS is not set +CONFIG_ADFS_FS=y +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m # CONFIG_UMSDOS_FS is not set -CONFIG_VFAT_FS=y +CONFIG_VFAT_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_MINIX_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set CONFIG_PROC_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set CONFIG_NFS_FS=y -CONFIG_NFSD=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=m +# CONFIG_NFSD_SUN is not set CONFIG_SUNRPC=y CONFIG_LOCKD=y -# CONFIG_CODA_FS is not set # CONFIG_SMB_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_UFS_FS is not set -CONFIG_ADFS_FS=y -CONFIG_ADFS_FS=y +# CONFIG_NCP_FS is not set + +# +# Partition Types +# +# CONFIG_OSF_PARTITION is not set # CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SGI_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +CONFIG_ACORN_PARTITION=y +CONFIG_ACORN_PARTITION_ADFS=y +# CONFIG_ACORN_PARTITION_ICS is not set +# CONFIG_ACORN_PARTITION_POWERTEC is not set +# CONFIG_ACORN_PARTITION_RISCIX is not set CONFIG_NLS=y # # Native Language Support # -# CONFIG_NLS_CODEPAGE_437 is not set +CONFIG_NLS_CODEPAGE_437=m # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set @@ -223,8 +470,8 @@ CONFIG_NLS=y # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_1 is not set -# CONFIG_NLS_ISO8859_2 is not set +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set @@ -232,34 +479,15 @@ CONFIG_NLS=y # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_ISO8859_9 is not set +CONFIG_NLS_ISO8859_15=m # CONFIG_NLS_KOI8_R is not set # -# Character devices -# -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -# CONFIG_SERIAL_CONSOLE is not set -# CONFIG_SERIAL_EXTENDED is not set -CONFIG_ATOMWIDE_SERIAL=y -CONFIG_DUALSP_SERIAL=y -CONFIG_MOUSE=y -CONFIG_PRINTER=m -CONFIG_PRINTER_READBACK=y -# CONFIG_UMISC is not set -# CONFIG_WATCHDOG is not set -CONFIG_RPCMOUSE=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_VIDC=y -CONFIG_AUDIO=y -DSP_BUFFSIZE=65536 - -# # Kernel hacking # +CONFIG_FRAME_POINTER=y +CONFIG_DEBUG_ERRORS=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_INFO is not set CONFIG_MAGIC_SYSRQ=y +# CONFIG_DEBUG_LL is not set diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 23b2c1267..5bcc22af1 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -9,31 +9,37 @@ HEAD_OBJ = head-$(PROCESSOR).o ENTRY_OBJ = entry-$(PROCESSOR).o O_TARGET := kernel.o -O_OBJS := $(ENTRY_OBJ) ioport.o irq.o process.o ptrace.o setup.o \ +O_OBJS := $(ENTRY_OBJ) irq.o process.o ptrace.o setup.o \ signal.o sys_arm.o time.o traps.o -DMA_OBJS_arc = dma-arc.o -DMA_OBJS_a5k = dma-a5k.o -DMA_OBJS_rpc = dma-rpc.o -DMA_OBJS_ebsa110 = dma-dummy.o -DMA_OBJS_ebsa285 = dma-ebsa285.o -DMA_OBJS_nexuspci = -DMA_OBJS_vnc = dma-vnc.o - -O_OBJS_arc = ecard.o iic.o fiq.o oldlatches.o -O_OBJS_a5k = ecard.o iic.o fiq.o -O_OBJS_rpc = ecard.o iic.o fiq.o -O_OBJS_ebsa110 = leds-ebsa110.o -O_OBJS_ebsa285 = leds-ebsa285.o hw-ebsa285.o -O_OBJS_nexuspci = -O_OBJS_vnc = leds-ebsa285.o hw-vnc.o +ifeq ($(CONFIG_ISA_DMA),y) + ISA_DMA_OBJS += dma-isa.o +endif + +O_OBJS_arc = dma-arc.o iic.o fiq.o oldlatches.o +O_OBJS_a5k = dma-a5k.o iic.o fiq.o +O_OBJS_rpc = dma-rpc.o iic.o fiq.o +O_OBJS_ebsa110 = dma-dummy.o +O_OBJS_footbridge = dma-footbridge.o $(ISA_DMA_OBJS) +O_OBJS_nexuspci = dma-dummy.o + +OX_OBJS_arc = dma.o +OX_OBJS_a5k = dma.o +OX_OBJS_rpc = dma.o +OX_OBJS_ebsa110 = +OX_OBJS_footbridge= dma.o hw-footbridge.o +OX_OBJS_nexuspci = all: lib kernel.o $(HEAD_OBJ) init_task.o +O_OBJS += $(O_OBJS_$(MACHINE)) + ifeq ($(CONFIG_MODULES),y) OX_OBJS = armksyms.o -else - O_OBJS += armksyms.o +endif + +ifeq ($(CONFIG_ARCH_ACORN),y) + OX_OBJS += ecard.o endif ifeq ($(MACHINE),nexuspci) @@ -46,17 +52,23 @@ else endif endif -ifneq ($(DMA_OBJS_$(MACHINE)),) - OX_OBJS += dma.o - O_OBJS += $(DMA_OBJS_$(MACHINE)) - ifeq ($(CONFIG_ISA_DMA),y) - O_OBJS += dma-isa.o - endif +ifdef CONFIG_LEDS + OX_OBJS += leds-$(MACHINE).o +endif + +ifeq ($(CONFIG_MODULES),y) + OX_OBJS += $(OX_OBJS_$(MACHINE)) else - O_OBJS += dma-dummy.o + O_OBJS += $(OX_OBJS_$(MACHINE)) endif -O_OBJS += $(O_OBJS_$(MACHINE)) +ifeq ($(CONFIG_ARTHUR),y) + O_OBJS += arthur.o +else + ifeq ($(CONFIG_ARTHUR),m) + M_OBJS += arthur.o + endif +endif $(HEAD_OBJ): $(HEAD_OBJ:.o=.S) $(CC) -D__ASSEMBLY__ -DTEXTADDR=$(TEXTADDR) -traditional -c $(HEAD_OBJ:.o=.S) -o $@ @@ -72,3 +84,7 @@ $(ENTRY_OBJ): ../lib/constants.h lib: $(MAKE) -C ../lib constants.h + +# Spell out some dependencies that `make dep' doesn't spot +entry-armv.o: calls.S +entry-armo.o: calls.S diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 149349b4a..c93421ba7 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -6,20 +6,40 @@ #include <linux/mman.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/in6.h> -#include <asm/ecard.h> #include <asm/elf.h> #include <asm/io.h> #include <asm/dma.h> #include <asm/pgtable.h> +#include <asm/semaphore.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/checksum.h> extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, struct user_fp_struct *); extern void inswb(unsigned int port, void *to, int len); extern void outswb(unsigned int port, const void *to, int len); +extern unsigned int local_bh_count[NR_CPUS]; +extern unsigned int local_irq_count[NR_CPUS]; + +extern void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags); +extern void iounmap(void *addr); + +extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + +/* + * syscalls + */ +extern int sys_write(int, const char *, int); +extern int sys_read(int, char *, int); +extern int sys_lseek(int, off_t, int); +extern int sys_open(const char *, int, int); +extern int sys_exit(int); +extern int sys_wait4(int, int *, int, struct rusage *); + /* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that @@ -43,6 +63,8 @@ extern void __udivsi3(void); extern void __umoddi3(void); extern void __umodsi3(void); +extern void ret_from_exception(void); +extern void fpundefinstr(void); extern void fp_enter(void); #define EXPORT_SYMBOL_ALIAS(sym,orig) \ const char __kstrtab_##sym##[] __attribute__((section(".kstrtab"))) = \ @@ -57,32 +79,46 @@ EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter); EXPORT_SYMBOL_ALIAS(fp_printk,printk); EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig); +#ifdef CONFIG_CPU_26 +EXPORT_SYMBOL(fpundefinstr); +EXPORT_SYMBOL(ret_from_exception); +#endif + /* platform dependent support */ EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(udelay); EXPORT_SYMBOL(xchg_str); - - /* expansion card support */ -#ifdef CONFIG_ARCH_ACORN -EXPORT_SYMBOL(ecard_startfind); -EXPORT_SYMBOL(ecard_find); -EXPORT_SYMBOL(ecard_readchunk); -EXPORT_SYMBOL(ecard_address); +EXPORT_SYMBOL(local_bh_count); +EXPORT_SYMBOL(local_irq_count); +#ifdef CONFIG_CPU_32 +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); #endif +EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(enable_irq); EXPORT_SYMBOL(disable_irq); /* processor dependencies */ EXPORT_SYMBOL(processor); -EXPORT_SYMBOL(machine_type); +EXPORT_SYMBOL(__machine_arch_type); + + /* networking */ +EXPORT_SYMBOL(csum_partial_copy); +EXPORT_SYMBOL(__csum_ipv6_magic); /* io */ -EXPORT_SYMBOL(outswb); +EXPORT_SYMBOL(outsb); EXPORT_SYMBOL(outsw); -EXPORT_SYMBOL(inswb); +EXPORT_SYMBOL(outsl); +EXPORT_SYMBOL(insb); EXPORT_SYMBOL(insw); +EXPORT_SYMBOL(insl); + +EXPORT_SYMBOL(_memcpy_fromio); +EXPORT_SYMBOL(_memcpy_toio); +EXPORT_SYMBOL(_memset_io); /* address translation */ #ifndef __virt_to_phys__is_a_macro @@ -98,7 +134,9 @@ EXPORT_SYMBOL(__virt_to_bus); EXPORT_SYMBOL(__bus_to_virt); #endif +#ifndef CONFIG_NO_PGT_CACHE EXPORT_SYMBOL(quicklists); +#endif EXPORT_SYMBOL(__bad_pmd); EXPORT_SYMBOL(__bad_pmd_kernel); @@ -167,3 +205,17 @@ EXPORT_SYMBOL(find_next_zero_bit); EXPORT_SYMBOL(armidlist); EXPORT_SYMBOL(armidindex); EXPORT_SYMBOL(elf_platform); + + /* syscalls */ +EXPORT_SYMBOL(sys_write); +EXPORT_SYMBOL(sys_read); +EXPORT_SYMBOL(sys_lseek); +EXPORT_SYMBOL(sys_open); +EXPORT_SYMBOL(sys_exit); +EXPORT_SYMBOL(sys_wait4); + + /* semaphores */ +EXPORT_SYMBOL_NOVERS(__down_failed); +EXPORT_SYMBOL_NOVERS(__down_interruptible_failed); +EXPORT_SYMBOL_NOVERS(__up_wakeup); + diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c new file mode 100644 index 000000000..9994fdd4a --- /dev/null +++ b/arch/arm/kernel/arthur.c @@ -0,0 +1,88 @@ +/* + * Arthur personality + * Copyright (C) 1998 Philip Blundell + */ + +#include <linux/personality.h> +#include <linux/module.h> +#include <linux/stddef.h> +#include <linux/signal.h> +#include <linux/sched.h> + +#include <asm/ptrace.h> + +/* RISC OS doesn't have many signals, and a lot of those that it does + have don't map easily to any Linux equivalent. Never mind. */ + +#define RISCOS_SIGABRT 1 +#define RISCOS_SIGFPE 2 +#define RISCOS_SIGILL 3 +#define RISCOS_SIGINT 4 +#define RISCOS_SIGSEGV 5 +#define RISCOS_SIGTERM 6 +#define RISCOS_SIGSTAK 7 +#define RISCOS_SIGUSR1 8 +#define RISCOS_SIGUSR2 9 +#define RISCOS_SIGOSERROR 10 + +static unsigned long riscos_to_linux_signals[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +static unsigned long linux_to_riscos_signals[32] = { + 0, -1, RISCOS_SIGINT, -1, + RISCOS_SIGILL, 5, RISCOS_SIGABRT, 7, + RISCOS_SIGFPE, 9, RISCOS_SIGUSR1, RISCOS_SIGSEGV, + RISCOS_SIGUSR2, 13, 14, RISCOS_SIGTERM, + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 +}; + +static void arthur_lcall7(int nr, struct pt_regs *regs) +{ + struct siginfo info; + info.si_signo = SIGSWI; + info.si_code = nr; + /* Bounce it to the emulator */ + send_sig_info(SIGSWI, &info, current); +} + +static struct exec_domain riscos_exec_domain = { + "Arthur", /* name */ + (lcall7_func)arthur_lcall7, + PER_RISCOS, PER_RISCOS, + riscos_to_linux_signals, + linux_to_riscos_signals, +#ifdef MODULE + &__this_module, /* No usage counter. */ +#else + NULL, +#endif + NULL /* Nothing after this in the list. */ +}; + +/* + * We could do with some locking to stop Arthur being removed while + * processes are using it. + */ + +#ifdef MODULE +int init_module(void) +#else +int initialise_arthur(void) +#endif +{ + return register_exec_domain(&riscos_exec_domain); +} + +#ifdef MODULE +void cleanup_module(void) +{ + unregister_exec_domain(&riscos_exec_domain); +} +#endif diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 46f71fa92..154e3aeab 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -31,7 +31,7 @@ .long SYMBOL_NAME(sys_lseek) /* 20 */ .long SYMBOL_NAME(sys_getpid) .long SYMBOL_NAME(sys_mount_wrapper) - .long SYMBOL_NAME(sys_umount) + .long SYMBOL_NAME(sys_oldumount) .long SYMBOL_NAME(sys_setuid) .long SYMBOL_NAME(sys_getuid) /* 25 */ .long SYMBOL_NAME(sys_stime) @@ -61,7 +61,7 @@ .long SYMBOL_NAME(sys_geteuid) /* 50 */ .long SYMBOL_NAME(sys_getegid) .long SYMBOL_NAME(sys_acct) - .long SYMBOL_NAME(sys_ni_syscall) /* was sys_phys */ + .long SYMBOL_NAME(sys_umount) .long SYMBOL_NAME(sys_ni_syscall) /* was sys_lock */ .long SYMBOL_NAME(sys_ioctl) /* 55 */ .long SYMBOL_NAME(sys_fcntl) @@ -110,7 +110,7 @@ .long SYMBOL_NAME(sys_ni_syscall) /* was sys_profil */ .long SYMBOL_NAME(sys_statfs) /* 100 */ .long SYMBOL_NAME(sys_fstatfs) - .long SYMBOL_NAME(sys_ni_syscall) /* .long _sys_ioperm */ + .long SYMBOL_NAME(sys_ni_syscall) .long SYMBOL_NAME(sys_socketcall) .long SYMBOL_NAME(sys_syslog) .long SYMBOL_NAME(sys_setitimer) @@ -119,7 +119,7 @@ .long SYMBOL_NAME(sys_newlstat) .long SYMBOL_NAME(sys_newfstat) .long SYMBOL_NAME(sys_uname) -/* 110 */ .long SYMBOL_NAME(sys_iopl) +/* 110 */ .long SYMBOL_NAME(sys_ni_syscall) .long SYMBOL_NAME(sys_vhangup) .long SYMBOL_NAME(sys_idle) .long SYMBOL_NAME(sys_syscall) /* call a syscall */ @@ -196,6 +196,10 @@ .long SYMBOL_NAME(sys_capget) /* 185 */ .long SYMBOL_NAME(sys_capset) .long SYMBOL_NAME(sys_sigaltstack_wrapper) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) + .long SYMBOL_NAME(sys_ni_syscall) +/* 190 */ .long SYMBOL_NAME(sys_vfork_wrapper) .rept NR_syscalls-186 .long SYMBOL_NAME(sys_ni_syscall) diff --git a/arch/arm/kernel/dec21285.c b/arch/arm/kernel/dec21285.c index aa66ee04a..c4103abee 100644 --- a/arch/arm/kernel/dec21285.c +++ b/arch/arm/kernel/dec21285.c @@ -8,17 +8,17 @@ #include <linux/pci.h> #include <linux/ptrace.h> #include <linux/interrupt.h> +#include <linux/mm.h> #include <linux/init.h> #include <asm/irq.h> #include <asm/system.h> +#include <asm/hardware.h> -#define MAX_SLOTS 20 +#define MAX_SLOTS 21 extern void pcibios_fixup_ebsa285(struct pci_dev *dev); extern void pcibios_init_ebsa285(void); -extern void pcibios_fixup_vnc(struct pci_dev *dev); -extern void pcibios_init_vnc(void); int pcibios_present(void) @@ -33,11 +33,12 @@ pcibios_base_address(unsigned char bus, unsigned char dev_fn) int slot = PCI_SLOT(dev_fn); if (slot < MAX_SLOTS) - return 0xf8c00000 + (slot << 11) + (PCI_FUNC(dev_fn) << 8); + return PCICFG0_BASE + 0xc00000 + + (slot << 11) + (PCI_FUNC(dev_fn) << 8); else return 0; } else - return 0xf9000000 | (bus << 16) | (dev_fn << 8); + return PCICFG1_BASE | (bus << 16) | (dev_fn << 8); } int @@ -151,10 +152,7 @@ __initfunc(void pcibios_fixup(void)) struct pci_dev *dev; for (dev = pci_devices; dev; dev = dev->next) { - if (machine_is_ebsa285() || machine_is_cats()) - pcibios_fixup_ebsa285(dev); - if (machine_is_netwinder()) - pcibios_fixup_vnc(dev); + pcibios_fixup_ebsa285(dev); pcibios_write_config_byte(dev->bus->number, dev->devfn, PCI_INTERRUPT_LINE, dev->irq); @@ -164,18 +162,83 @@ __initfunc(void pcibios_fixup(void)) dev->bus->number, dev->devfn, dev->vendor, dev->device, dev->irq); } - if (machine_is_netwinder()) - hw_init(); + + hw_init(); } __initfunc(void pcibios_init(void)) { - if (machine_is_ebsa285() || machine_is_cats()) - pcibios_init_ebsa285(); - if (machine_is_netwinder()) - pcibios_init_vnc(); - - printk("DEC21285 PCI revision %02X\n", *(unsigned char *)0xfe000008); + unsigned int mem_size = (unsigned int)high_memory - PAGE_OFFSET; + unsigned long cntl; + + *CSR_SDRAMBASEMASK = (mem_size - 1) & 0x0ffc0000; + *CSR_SDRAMBASEOFFSET = 0; + *CSR_ROMBASEMASK = 0x80000000; + *CSR_CSRBASEMASK = 0; + *CSR_CSRBASEOFFSET = 0; + *CSR_PCIADDR_EXTN = 0; + +#ifdef CONFIG_HOST_FOOTBRIDGE + /* + * Against my better judgement, Philip Blundell still seems + * to be saying that we should initialise the PCI stuff here + * when the PCI_CFN bit is not set, dispite my comment below, + * which he decided to remove. If it is not set, then + * the card is in add-in mode, and we're in a machine where + * the bus is set up by 'others'. + * + * We should therefore not mess about with the mapping in + * anyway, and we should not be using the virt_to_bus functions + * that exist in the HOST architecture mode (since they assume + * a fixed mapping). + * + * Instead, you should be using ADDIN mode, which allows for + * this situation. This does assume that you have correctly + * initialised the PCI bus, which you must have done to get + * your PC booted. + * + * Unfortunately, he seems to be blind to this. I guess he'll + * also remove all this. + * + * And THIS COMMENT STAYS, even if this gets patched, thank + * you. + */ + + /* + * Map our SDRAM at a known address in PCI space, just in case + * the firmware had other ideas. Using a nonzero base is + * necessary, since some VGA cards forcefully use PCI addresses + * in the range 0x000a0000 to 0x000c0000. (eg, S3 cards). + * + * NOTE! If you need to chec the PCI_CFN bit in the SA110 + * control register then you've configured the kernel wrong. + * If you're not using host mode, then DO NOT set + * CONFIG_HOST_FOOTBRIDGE, but use CONFIG_ADDIN_FOOTBRIDGE + * instead. In this case, you MUST supply some firmware + * to allow your PC to boot, plus we should not modify the + * mappings that the PC BIOS has set up for us. + */ + *CSR_PCICACHELINESIZE = 0x00002008; + *CSR_PCICSRBASE = 0; + *CSR_PCICSRIOBASE = 0; + *CSR_PCISDRAMBASE = virt_to_bus((void *)PAGE_OFFSET); + *CSR_PCIROMBASE = 0; + *CSR_PCICMD = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | PCI_COMMAND_FAST_BACK | + PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY | + (1 << 31) | (1 << 29) | (1 << 28) | (1 << 24); +#endif + + /* + * Clear any existing errors - we aren't + * interested in historical data... + */ + cntl = *CSR_SA110_CNTL & 0xffffde07; + *CSR_SA110_CNTL = cntl | SA110_CNTL_RXSERR; + + pcibios_init_ebsa285(); + + printk(KERN_DEBUG"PCI: DEC21285 revision %02lX\n", *CSR_CLASSREV & 0xff); } __initfunc(void pcibios_fixup_bus(struct pci_bus *bus)) diff --git a/arch/arm/kernel/dma-a5k.c b/arch/arm/kernel/dma-a5k.c index 18bbf0c9c..df02ea54e 100644 --- a/arch/arm/kernel/dma-a5k.c +++ b/arch/arm/kernel/dma-a5k.c @@ -12,7 +12,6 @@ #include <asm/fiq.h> #include <asm/io.h> #include <asm/hardware.h> -#include <asm/pgtable.h> #include "dma.h" @@ -37,8 +36,9 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) if (channel != DMA_VIRTUAL_FLOPPY) printk("arch_dma_count: invalid channel %d\n", channel); else { - extern int floppy_fiqresidual(void); - return floppy_fiqresidual(); + struct pt_regs regs; + get_fiq_regs(®s); + return regs.ARM_r9; } return 0; } @@ -48,6 +48,7 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) if (channel != DMA_VIRTUAL_FLOPPY) printk("arch_enable_dma: invalid channel %d\n", channel); else { + struct pt_regs regs; void *fiqhandler_start; unsigned int fiqhandler_length; extern void floppy_fiqsetup(unsigned long len, unsigned long addr, @@ -67,8 +68,10 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) return; } memcpy((void *)0x1c, fiqhandler_start, fiqhandler_length); - flush_page_to_ram(0); - floppy_fiqsetup(dma->buf.length, __bus_to_virt(dma->buf.address), (int)PCIO_FLOPPYDMABASE); + regs.ARM_r9 = dma->buf.length; + regs.ARM_r10 = __bus_to_virt(dma->buf.address); + regs.ARM_fp = (int)PCIO_FLOPPYDMABASE; + set_fiq_regs(®s); enable_irq(dma->dma_irq); } } @@ -83,6 +86,11 @@ void arch_disable_dma(dmach_t channel, dma_t *dma) } } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns) +{ + return 0; +} + __initfunc(void arch_dma_init(dma_t *dma)) { dma[DMA_VIRTUAL_FLOPPY].dma_irq = 64; diff --git a/arch/arm/kernel/dma-arc.c b/arch/arm/kernel/dma-arc.c index 27a139ad4..9be27bdae 100644 --- a/arch/arm/kernel/dma-arc.c +++ b/arch/arm/kernel/dma-arc.c @@ -1,10 +1,11 @@ /* * arch/arm/kernel/dma-arc.c * - * Copyright (C) 1998 Dave Gilbert / Russell King + * Copyright (C) 1998-1999 Dave Gilbert / Russell King * * DMA functions specific to Archimedes architecture */ +#include <linux/config.h> #include <linux/sched.h> #include <linux/init.h> @@ -14,7 +15,7 @@ #include "dma.h" -int arch_request_dma(dmach_t channel, dma_t *dma) +int arch_request_dma(dmach_t channel, dma_t *dma, const char * dev_id) { if (channel == DMA_VIRTUAL_FLOPPY0 || channel == DMA_VIRTUAL_FLOPPY1) @@ -25,16 +26,12 @@ int arch_request_dma(dmach_t channel, dma_t *dma) void arch_free_dma(dmach_t channel, dma_t *dma) { - if (channel != DMA_VIRTUAL_FLOPPY0 && - channel != DMA_VIRTUAL_FLOPPY1) - return 0; - else - return -EINVAL; } void arch_enable_dma(dmach_t channel, dma_t *dma) { switch (channel) { +#ifdef CONFIG_BLK_DEV_FD case DMA_VIRTUAL_FLOPPY0: { /* Data DMA */ switch (dma->dma_mode) { case DMA_MODE_READ: /* read */ @@ -96,9 +93,38 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) restore_flags(flags); } break; +#endif } } +int arch_get_dma_residue(dmach_t channel, dma_t *dma) +{ + switch (channel) { +#ifdef CONFIG_BLK_DEV_FD + case DMA_VIRTUAL_FLOPPY0: { /* Data DMA */ + extern unsigned int fdc1772_bytestogo; + + /* 10/1/1999 DAG - I presume its the number of bytes left? */ + return fdc1772_bytestogo; + }; + break; + + case DMA_VIRTUAL_FLOPPY1: { /* Command completed */ + /* 10/1/1999 DAG - Presume whether there is an outstanding command? */ + extern unsigned int fdc1772_fdc_int_done; + + return (fdc1772_fdc_int_done==0)?1:0; /* Explicit! If the int done is 0 then 1 int to go */ + }; + break; + +#endif + + default: + printk("dma-arc.c:arch_get_dma_residue called with unknown/unconfigured DMA channel\n"); + return 0; + }; +} + void arch_disable_dma(dmach_t channel, dma_t *dma) { if (channel != DMA_VIRTUAL_FLOPPY0 && @@ -108,6 +134,11 @@ void arch_disable_dma(dmach_t channel, dma_t *dma) disable_irq(dma->dma_irq); } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns) +{ + return 0; +} + __initfunc(void arch_dma_init(dma_t *dma)) { dma[DMA_VIRTUAL_FLOPPY0].dma_irq = 64; diff --git a/arch/arm/kernel/dma-dummy.c b/arch/arm/kernel/dma-dummy.c index be72a8965..db46ef1c3 100644 --- a/arch/arm/kernel/dma-dummy.c +++ b/arch/arm/kernel/dma-dummy.c @@ -9,6 +9,10 @@ #include <linux/errno.h> #include <linux/init.h> +#include <asm/spinlock.h> + +spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED; + int request_dma(int channel, const char *device_id) { return -EINVAL; diff --git a/arch/arm/kernel/dma-ebsa285.c b/arch/arm/kernel/dma-footbridge.c index f1c42dac2..a355283dc 100644 --- a/arch/arm/kernel/dma-ebsa285.c +++ b/arch/arm/kernel/dma-footbridge.c @@ -6,7 +6,9 @@ * DMA functions specific to EBSA-285/CATS architectures * * Changelog: - * 09/11/1998 RMK Split out ISA DMA functions to dma-isa.c + * 09-Nov-1998 RMK Split out ISA DMA functions to dma-isa.c + * 17-Mar-1999 RMK Allow any EBSA285-like architecture to have + * ISA DMA controllers. */ #include <linux/config.h> @@ -16,7 +18,6 @@ #include <linux/init.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/dma.h> #include <asm/io.h> #include <asm/hardware.h> @@ -24,16 +25,22 @@ #include "dma.h" #include "dma-isa.h" +#ifdef CONFIG_ISA_DMA +static int has_isa_dma; +#else +#define has_isa_dma 0 +#endif + int arch_request_dma(dmach_t channel, dma_t *dma, const char *dev_name) { switch (channel) { - case 0: - case 1: /* 21285 internal channels */ + case _DC21285_DMA(0): + case _DC21285_DMA(1): /* 21285 internal channels */ return 0; - case 2 ... 9: - if (machine_is_cats()) - return isa_request_dma(channel - 2, dma, dev_name); + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + return isa_request_dma(channel - _ISA_DMA(0), dma, dev_name); } return -EINVAL; @@ -49,14 +56,13 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) int residue = 0; switch (channel) { - case 0: - case 1: + case _DC21285_DMA(0): + case _DC21285_DMA(1): break; -#ifdef CONFIG_CATS - case 2 ... 9: - if (machine_is_cats()) - residue = isa_get_dma_residue(channel - 2); -#endif + + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + residue = isa_get_dma_residue(channel - _ISA_DMA(0), dma); } return residue; } @@ -64,38 +70,43 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) void arch_enable_dma(dmach_t channel, dma_t *dma) { switch (channel) { - case 0: - case 1: + case _DC21285_DMA(0): + case _DC21285_DMA(1): /* * Not yet implemented */ break; -#ifdef CONFIG_CATS - case 2 ... 9: - if (machine_is_cats()) - isa_enable_dma(channel - 2, dma); -#endif + + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + isa_enable_dma(channel - _ISA_DMA(0), dma); } } void arch_disable_dma(dmach_t channel, dma_t *dma) { switch (channel) { - case 0: - case 1: + case _DC21285_DMA(0): + case _DC21285_DMA(1): /* * Not yet implemented */ break; -#ifdef CONFIG_CATS - case 2 ... 9: - if (machine_is_cats()) - isa_disable_dma(channel - 2, dma); -#endif + + case _ISA_DMA(0) ... _ISA_DMA(7): + if (has_isa_dma) + isa_disable_dma(channel - _ISA_DMA(0), dma); } } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns) +{ + return 0; +} + __initfunc(void arch_dma_init(dma_t *dma)) { - /* Nothing to do */ +#ifdef CONFIG_ISA_DMA + has_isa_dma = isa_init_dma(); +#endif } diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c index bdf7c6147..19be50433 100644 --- a/arch/arm/kernel/dma-isa.c +++ b/arch/arm/kernel/dma-isa.c @@ -11,6 +11,7 @@ * Copyright (C) 1998 Phil Blundell */ #include <linux/sched.h> +#include <linux/init.h> #include <asm/dma.h> #include <asm/io.h> @@ -18,6 +19,11 @@ #include "dma.h" #include "dma-isa.h" +#define ISA_DMA_MODE_READ 0x44 +#define ISA_DMA_MODE_WRITE 0x48 +#define ISA_DMA_MODE_CASCADE 0xc0 +#define ISA_DMA_AUTOINIT 0x10 + #define ISA_DMA_MASK 0 #define ISA_DMA_MODE 1 #define ISA_DMA_CLRFF 2 @@ -40,10 +46,7 @@ static unsigned int isa_dma_port[8][7] = { int isa_request_dma(int channel, dma_t *dma, const char *dev_name) { - if (channel != 4) - return 0; - - return -EINVAL; + return 0; } void isa_free_dma(int channel, dma_t *dma) @@ -56,25 +59,27 @@ int isa_get_dma_residue(int channel, dma_t *dma) unsigned int io_port = isa_dma_port[channel][ISA_DMA_COUNT]; int count; - count = 1 + inb(io_port) + (inb(io_port) << 8); + count = 1 + inb(io_port); + count |= inb(io_port) << 8; return channel < 4 ? count : (count << 1); } void isa_enable_dma(int channel, dma_t *dma) { - unsigned long address, length; - if (dma->invalid) { + unsigned long address, length; + unsigned int mode; + address = dma->buf.address; length = dma->buf.length - 1; - outb(address >> 24, isa_dma_port[channel][ISA_DMA_PGHI]); outb(address >> 16, isa_dma_port[channel][ISA_DMA_PGLO]); + outb(address >> 24, isa_dma_port[channel][ISA_DMA_PGHI]); if (channel >= 4) { address >>= 1; - length = (length >> 1) & 0xfe; /* why &0xfe? */ + length >>= 1; } outb(0, isa_dma_port[channel][ISA_DMA_CLRFF]); @@ -85,17 +90,31 @@ void isa_enable_dma(int channel, dma_t *dma) outb(length, isa_dma_port[channel][ISA_DMA_COUNT]); outb(length >> 8, isa_dma_port[channel][ISA_DMA_COUNT]); - outb(dma->dma_mode | (channel & 3), isa_dma_port[channel][ISA_DMA_MODE]); + mode = channel & 3; - switch (dma->dma_mode) { + switch (dma->dma_mode & DMA_MODE_MASK) { case DMA_MODE_READ: + mode |= ISA_DMA_MODE_READ; dma_cache_inv(__bus_to_virt(dma->buf.address), dma->buf.length); break; case DMA_MODE_WRITE: + mode |= ISA_DMA_MODE_WRITE; dma_cache_wback(__bus_to_virt(dma->buf.address), dma->buf.length); break; + + case DMA_MODE_CASCADE: + mode |= ISA_DMA_MODE_CASCADE; + break; + + default: + break; } + + if (dma->dma_mode & DMA_AUTOINIT) + mode |= ISA_DMA_AUTOINIT; + + outb(mode, isa_dma_port[channel][ISA_DMA_MODE]); dma->invalid = 0; } outb(channel & 3, isa_dma_port[channel][ISA_DMA_MASK]); @@ -105,3 +124,56 @@ void isa_disable_dma(int channel, dma_t *dma) { outb(channel | 4, isa_dma_port[channel][ISA_DMA_MASK]); } + +__initfunc(int isa_init_dma(void)) +{ + int dmac_found; + + outb(0xff, 0x0d); + outb(0xff, 0xda); + + outb(0x55, 0x00); + outb(0xaa, 0x00); + + dmac_found = inb(0x00) == 0x55 && inb(0x00) == 0xaa; + + if (dmac_found) { + int channel; + + for (channel = 0; channel < 8; channel++) + isa_disable_dma(channel, NULL); + + outb(0x40, 0x0b); + outb(0x41, 0x0b); + outb(0x42, 0x0b); + outb(0x43, 0x0b); + + outb(0xc0, 0xd6); + outb(0x41, 0xd6); + outb(0x42, 0xd6); + outb(0x43, 0xd6); + + outb(0, 0xd4); + + outb(0x10, 0x08); + outb(0x10, 0xd0); + + /* + * Is this correct? According to + * my documentation, it doesn't + * appear to be. It should be + * outb(0x3f, 0x40b); outb(0x3f, 0x4d6); + */ + outb(0x30, 0x40b); + outb(0x31, 0x40b); + outb(0x32, 0x40b); + outb(0x33, 0x40b); + outb(0x31, 0x4d6); + outb(0x32, 0x4d6); + outb(0x33, 0x4d6); + + request_dma(DMA_ISA_CASCADE, "cascade"); + } + + return dmac_found; +} diff --git a/arch/arm/kernel/dma-isa.h b/arch/arm/kernel/dma-isa.h index 3fcbdb3c7..2640f6c3a 100644 --- a/arch/arm/kernel/dma-isa.h +++ b/arch/arm/kernel/dma-isa.h @@ -23,3 +23,7 @@ void isa_enable_dma(int channel, dma_t *dma); */ void isa_disable_dma(int channel, dma_t *dma); +/* + * Initialise DMA + */ +int isa_init_dma(void); diff --git a/arch/arm/kernel/dma-rpc.c b/arch/arm/kernel/dma-rpc.c index 00cd95e79..d3fcd9116 100644 --- a/arch/arm/kernel/dma-rpc.c +++ b/arch/arm/kernel/dma-rpc.c @@ -11,10 +11,10 @@ #include <linux/init.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/dma.h> #include <asm/fiq.h> #include <asm/io.h> +#include <asm/iomd.h> #include <asm/hardware.h> #include <asm/uaccess.h> @@ -223,8 +223,9 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma) break; case DMA_VIRTUAL_FLOPPY: { - extern int floppy_fiqresidual(void); - residue = floppy_fiqresidual(); + struct pt_regs regs; + get_fiq_regs(®s); + return regs.ARM_r9; } break; } @@ -286,7 +287,6 @@ void arch_enable_dma(dmach_t channel, dma_t *dma) set_fiq_handler(fiqhandler_start, fiqhandler_length); set_fiq_regs(®s); enable_irq(dma->dma_irq); - } break; @@ -319,6 +319,46 @@ void arch_disable_dma(dmach_t channel, dma_t *dma) } } +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle) +{ + int tcr, speed; + + if (cycle < 188) + speed = 3; + else if (cycle <= 250) + speed = 2; + else if (cycle < 438) + speed = 1; + else + speed = 0; + + tcr = inb(IOMD_DMATCR); + speed &= 3; + + switch (channel) { + case DMA_0: + tcr = (tcr & ~0x03) | speed; + break; + + case DMA_1: + tcr = (tcr & ~0x0c) | (speed << 2); + break; + + case DMA_2: + tcr = (tcr & ~0x30) | (speed << 4); + break; + + case DMA_3: + tcr = (tcr & ~0xc0) | (speed << 6); + break; + + default: + break; + } + + outb(tcr, IOMD_DMATCR); +} + __initfunc(void arch_dma_init(dma_t *dma)) { outb(0, IOMD_IO0CR); @@ -326,7 +366,7 @@ __initfunc(void arch_dma_init(dma_t *dma)) outb(0, IOMD_IO2CR); outb(0, IOMD_IO3CR); -// outb(0xf0, IOMD_DMATCR); + outb(0xa0, IOMD_DMATCR); dma[0].dma_base = ioaddr(IOMD_IO0CURA); dma[0].dma_irq = IRQ_DMA0; diff --git a/arch/arm/kernel/dma-vnc.c b/arch/arm/kernel/dma-vnc.c deleted file mode 100644 index 132fa627a..000000000 --- a/arch/arm/kernel/dma-vnc.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * arch/arm/kernel/dma-vnc.c - * - * Copyright (C) 1998 Russell King - */ -#include <linux/sched.h> -#include <linux/malloc.h> -#include <linux/mman.h> -#include <linux/init.h> - -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/dma.h> -#include <asm/io.h> -#include <asm/hardware.h> - -#include "dma.h" -#include "dma-isa.h" - -int arch_request_dma(dmach_t channel, dma_t *dma, const char *dev_name) -{ - if (channel < 8) - return isa_request_dma(channel, dma, dev_name); - return -EINVAL; -} - -void arch_free_dma(dmach_t channel, dma_t *dma) -{ - isa_free_dma(channel, dma); -} - -int arch_get_dma_residue(dmach_t channel, dma_t *dma) -{ - return isa_get_dma_residue(channel, dma); -} - -void arch_enable_dma(dmach_t channel, dma_t *dma) -{ - isa_enable_dma(channel, dma); -} - -void arch_disable_dma(dmach_t channel, dma_t *dma) -{ - isa_disable_dma(channel, dma); -} - -__initfunc(void arch_dma_init(dma_t *dma)) -{ - /* Nothing to do */ -} - diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index a164073ae..219e1f0f2 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -21,7 +21,6 @@ #include <linux/init.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/irq.h> #include <asm/hardware.h> #include <asm/io.h> @@ -201,6 +200,12 @@ void disable_dma (dmach_t channel) printk (KERN_ERR "Trying to disable free DMA%d\n", channel); } +void set_dma_speed(dmach_t channel, int cycle_ns) +{ + dma_chan[channel].speed = + arch_set_dma_speed(channel, &dma_chan[channel], cycle_ns); +} + int get_dma_residue(dmach_t channel) { return arch_get_dma_residue(channel, &dma_chan[channel]); @@ -214,6 +219,7 @@ EXPORT_SYMBOL(set_dma_count); EXPORT_SYMBOL(set_dma_mode); EXPORT_SYMBOL(get_dma_residue); EXPORT_SYMBOL(set_dma_sg); +EXPORT_SYMBOL(set_dma_speed); __initfunc(void init_dma(void)) { diff --git a/arch/arm/kernel/dma.h b/arch/arm/kernel/dma.h index e4c72c6af..33db3b03b 100644 --- a/arch/arm/kernel/dma.h +++ b/arch/arm/kernel/dma.h @@ -15,6 +15,7 @@ typedef struct { unsigned int active:1; /* Transfer active */ unsigned int invalid:1; /* Address/Count changed */ dmamode_t dma_mode; /* DMA mode */ + int speed; /* DMA speed */ unsigned int lock; /* Device is allocated */ const char *device_id; /* Device name */ @@ -63,6 +64,15 @@ void arch_disable_dma(dmach_t channel, dma_t *dma); */ int arch_get_dma_residue(dmach_t channel, dma_t *dma); +/* Prototype: int arch_set_dma_speed(channel, dma, cycle) + * Purpose : Convert a cycle time to a register setting + * Params : channel - DMA channel number + * : dma - DMA structure for channel + * : cycle - cycle time in NS + * Returns : setting for 'dma->speed' + */ +int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle); + /* Prototype: void arch_dma_init(dma) * Purpose : Initialise architecture specific DMA * Params : dma - pointer to array of DMA structures diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index fe1c75f5c..dd4bf670c 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -7,32 +7,43 @@ * * Created from information from Acorns RiscOS3 PRMs * - * 08-Dec-1996 RMK Added code for the 9'th expansion card - the ether podule slot. + * 08-Dec-1996 RMK Added code for the 9'th expansion card - the ether + * podule slot. * 06-May-1997 RMK Added blacklist for cards whose loader doesn't work. - * 12-Sep-1997 RMK Created new handling of interrupt enables/disables - cards can - * now register their own routine to control interrupts (recommended). - * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled on reset from - * Linux. (Caused cards not to respond under RiscOS without hard reset). + * 12-Sep-1997 RMK Created new handling of interrupt enables/disables + * - cards can now register their own routine to control + * interrupts (recommended). + * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled + * on reset from Linux. (Caused cards not to respond + * under RiscOS without hard reset). * 15-Feb-1998 RMK Added DMA support * 12-Sep-1998 RMK Added EASI support + * 10-Jan-1999 RMK Run loaders in a simulated RISC OS environment. + * 17-Apr-1999 RMK Support for EASI Type C cycles. */ #define ECARD_C +#define __KERNEL_SYSCALLS__ #include <linux/config.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/malloc.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/unistd.h> #include <linux/init.h> -#include <asm/io.h> -#include <asm/hardware.h> +#include <asm/dma.h> #include <asm/ecard.h> +#include <asm/hardware.h> +#include <asm/io.h> #include <asm/irq.h> -#include <asm/dma.h> +#include <asm/pgtable.h> #ifdef CONFIG_ARCH_ARC #include <asm/arch/oldlatches.h> @@ -40,45 +51,420 @@ #define oldlatch_init() #endif -#define BLACKLIST_NAME(m,p,s) { m, p, NULL, s } -#define BLACKLIST_LOADER(m,p,l) { m, p, l, NULL } -#define BLACKLIST_NOLOADER(m,p) { m, p, noloader, blacklisted_str } -#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE) +enum req { + req_readbytes, + req_reset +}; -extern unsigned long atomwide_serial_loader[], oak_scsi_loader[], noloader[]; -static const char blacklisted_str[] = "*loader s/w is not 32-bit compliant*"; +struct ecard_request { + enum req req; + ecard_t *ec; + unsigned int address; + unsigned int length; + unsigned int use_loader; + void *buffer; +}; -static const struct expcard_blacklist { +struct expcard_blacklist { unsigned short manufacturer; unsigned short product; - const loader_t loader; const char *type; -} blacklist[] = { -/* Cards without names */ - BLACKLIST_NAME(MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1"), - -/* Cards with corrected loader */ - BLACKLIST_LOADER(MANU_ATOMWIDE, PROD_ATOMWIDE_3PSERIAL, atomwide_serial_loader), - BLACKLIST_LOADER(MANU_OAK, PROD_OAK_SCSI, oak_scsi_loader), +}; -/* Supported cards with broken loader */ - { MANU_ALSYSTEMS, PROD_ALSYS_SCSIATAPI, noloader, "AlSystems PowerTec SCSI" }, +static ecard_t *cards; +static ecard_t *slot_to_expcard[MAX_ECARDS]; +static unsigned int ectcr; +#ifdef HAS_EXPMASK +static unsigned int have_expmask; +#endif -/* Unsupported cards with no loader */ - BLACKLIST_NOLOADER(MANU_MCS, PROD_MCS_CONNECT32) +/* List of descriptions of cards which don't have an extended + * identification, or chunk directories containing a description. + */ +static const struct expcard_blacklist __init blacklist[] = { + { MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" } }; +asmlinkage extern int +ecard_loader_reset(volatile unsigned char *pa, loader_t loader); +asmlinkage extern int +ecard_loader_read(int off, volatile unsigned char *pa, loader_t loader); extern int setup_arm_irq(int, struct irqaction *); +extern void do_ecard_IRQ(int, struct pt_regs *); + + +static void +ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs); + +static struct irqaction irqexpansioncard = { + ecard_irq_noexpmask, SA_INTERRUPT, 0, "expansion cards", NULL, NULL +}; + +static inline unsigned short +ecard_getu16(unsigned char *v) +{ + return v[0] | v[1] << 8; +} +static inline signed long +ecard_gets24(unsigned char *v) +{ + return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0); +} + +static inline ecard_t * +slot_to_ecard(unsigned int slot) +{ + return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL; +} + +/* ===================== Expansion card daemon ======================== */ /* - * from linux/arch/arm/kernel/irq.c + * Since the loader programs on the expansion cards need to be run + * in a specific environment, create a separate task with this + * environment up, and pass requests to this task as and when we + * need to. + * + * This should allow 99% of loaders to be called from Linux. + * + * From a security standpoint, we trust the card vendors. This + * may be a misplaced trust. */ -extern void do_ecard_IRQ(int irq, struct pt_regs *); +#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE) +#define POD_INT_ADDR(x) ((volatile unsigned char *)\ + ((BUS_ADDR((x)) - IO_BASE) + IO_START)) -static ecard_t expcard[MAX_ECARDS]; -static signed char irqno_to_expcard[16]; -static unsigned int ecard_numcards, ecard_numirqcards; -static unsigned int have_expmask; +static void +ecard_task_reset(struct ecard_request *req) +{ + if (req->ec == NULL) { + ecard_t *ec; + + for (ec = cards; ec; ec = ec->next) { + printk(KERN_DEBUG "Resetting card %d\n", + ec->slot_no); + + if (ec->loader) + ecard_loader_reset(POD_INT_ADDR(ec->podaddr), + ec->loader); + } + printk(KERN_DEBUG "All cards reset\n"); + } else if (req->ec->loader) + ecard_loader_reset(POD_INT_ADDR(req->ec->podaddr), + req->ec->loader); +} + +static void +ecard_task_readbytes(struct ecard_request *req) +{ + unsigned char *buf = (unsigned char *)req->buffer; + volatile unsigned char *base_addr = + (volatile unsigned char *)POD_INT_ADDR(req->ec->podaddr); + unsigned int len = req->length; + + if (req->ec->slot_no == 8) { + /* + * The card maintains an index which + * increments the address into a 4096-byte + * page on each access. We need to keep + * track of the counter. + */ + static unsigned int index; + unsigned int offset, page; + unsigned char byte = 0; /* keep gcc quiet */ + + offset = req->address & 4095; + page = req->address >> 12; + + if (page > 256) + return; + + page *= 4; + + if (offset == 0 || index > offset) { + /* + * We need to reset the index counter. + */ + *base_addr = 0; + index = 0; + } + + while (index <= offset) { + byte = base_addr[page]; + index += 1; + } + + while (len--) { + *buf++ = byte; + if (len) { + byte = base_addr[page]; + index += 1; + } + } + } else { + unsigned int off = req->address; + + if (!req->use_loader || !req->ec->loader) { + off *= 4; + while (len--) { + *buf++ = base_addr[off]; + off += 4; + } + } else { + while(len--) { + /* + * The following is required by some + * expansion card loader programs. + */ + *(unsigned long *)0x108 = 0; + *buf++ = ecard_loader_read(off++, base_addr, + req->ec->loader); + } + } + } + +} + +#ifdef CONFIG_CPU_32 +static pid_t ecard_pid; +static wait_queue_head_t ecard_wait; +static wait_queue_head_t ecard_done; +static struct ecard_request *ecard_req; + +/* + * Set up the expansion card daemon's environment. + */ +static void +ecard_init_task(void) +{ + /* We want to set up the page tables for the following mapping: + * Virtual Physical + * 0x03000000 0x03000000 + * 0x03010000 unmapped + * 0x03210000 0x03210000 + * 0x03400000 unmapped + * 0x08000000 0x08000000 + * 0x10000000 unmapped + * + * FIXME: we don't follow this 100% yet. + */ + pgd_t *src_pgd, *dst_pgd; + unsigned int dst_addr = IO_START; + + src_pgd = pgd_offset(current->mm, IO_BASE); + dst_pgd = pgd_offset(current->mm, dst_addr); + + while (dst_addr < IO_START + IO_SIZE) { + *dst_pgd++ = *src_pgd++; + dst_addr += PGDIR_SIZE; + } + + flush_tlb_range(current->mm, IO_START, IO_START + IO_SIZE); + + dst_addr = EASI_START; + src_pgd = pgd_offset(current->mm, EASI_BASE); + dst_pgd = pgd_offset(current->mm, dst_addr); + + while (dst_addr < EASI_START + EASI_SIZE) { + *dst_pgd++ = *src_pgd++; + dst_addr += PGDIR_SIZE; + } + + flush_tlb_range(current->mm, EASI_START, EASI_START + EASI_SIZE); +} + +static int +ecard_task(void * unused) +{ + current->session = 1; + current->pgrp = 1; + + /* + * We don't want /any/ signals, not even SIGKILL + */ + sigfillset(¤t->blocked); + sigemptyset(¤t->signal); + + strcpy(current->comm, "kecardd"); + + /* + * Set up the environment + */ + ecard_init_task(); + + while (1) { + struct ecard_request *req; + + do { + req = xchg(&ecard_req, NULL); + + if (req == NULL) { + sigemptyset(¤t->signal); + interruptible_sleep_on(&ecard_wait); + } + } while (req == NULL); + + switch (req->req) { + case req_readbytes: + ecard_task_readbytes(req); + break; + + case req_reset: + ecard_task_reset(req); + break; + } + wake_up(&ecard_done); + } +} + +/* + * Wake the expansion card daemon to action our request. + * + * FIXME: The test here is not sufficient to detect if the + * kcardd is running. + */ +static inline void +ecard_call(struct ecard_request *req) +{ + /* + * If we're called from task 0, or from an + * interrupt (will be keyboard interrupt), + * we forcefully set up the memory map, and + * call the loader. We can't schedule, or + * sleep for this call. + */ + if ((current == task[0] || in_interrupt()) && + req->req == req_reset && req->ec == NULL) { + ecard_init_task(); + ecard_task_reset(req); + } else { + if (ecard_pid <= 0) + ecard_pid = kernel_thread(ecard_task, NULL, 0); + + ecard_req = req; + + wake_up(&ecard_wait); + + sleep_on(&ecard_done); + } +} +#else +/* + * On 26-bit processors, we don't need the kcardd thread to access the + * expansion card loaders. We do it directly. + */ +static inline void +ecard_call(struct ecard_request *req) +{ + if (req->req == req_reset) + ecard_task_reset(req); + else + ecard_task_readbytes(req); +} +#endif + +/* ======================= Mid-level card control ===================== */ +/* + * This is called to reset the loaders for each expansion card on reboot. + * + * This is required to make sure that the card is in the correct state + * that RiscOS expects it to be. + */ +void +ecard_reset(int slot) +{ + struct ecard_request req; + + req.req = req_reset; + + if (slot < 0) + req.ec = NULL; + else + req.ec = slot_to_ecard(slot); + + ecard_call(&req); + +#ifdef HAS_EXPMASK + if (have_expmask && slot < 0) { + have_expmask |= ~0; + EXPMASK_ENABLE = have_expmask; + } +#endif +} + +static void +ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld) +{ + struct ecard_request req; + + req.req = req_readbytes; + req.ec = ec; + req.address = off; + req.length = len; + req.use_loader = useld; + req.buffer = addr; + + ecard_call(&req); +} + +int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num) +{ + struct ex_chunk_dir excd; + int index = 16; + int useld = 0; + + if (!ec->cid.cd) + return 0; + + while(1) { + ecard_readbytes(&excd, ec, index, 8, useld); + index += 8; + if (c_id(&excd) == 0) { + if (!useld && ec->loader) { + useld = 1; + index = 0; + continue; + } + return 0; + } + if (c_id(&excd) == 0xf0) { /* link */ + index = c_start(&excd); + continue; + } + if (c_id(&excd) == 0x80) { /* loader */ + if (!ec->loader) { + ec->loader = (loader_t)kmalloc(c_len(&excd), + GFP_KERNEL); + if (ec->loader) + ecard_readbytes(ec->loader, ec, + (int)c_start(&excd), + c_len(&excd), useld); + else + return 0; + } + continue; + } + if (c_id(&excd) == id && num-- == 0) + break; + } + + if (c_id(&excd) & 0x80) { + switch (c_id(&excd) & 0x70) { + case 0x70: + ecard_readbytes((unsigned char *)excd.d.string, ec, + (int)c_start(&excd), c_len(&excd), + useld); + break; + case 0x00: + break; + } + } + cd->start_offset = c_start(&excd); + memcpy(cd->d.string, excd.d.string, 256); + return 1; +} + +/* ======================= Interrupt control ============================ */ static void ecard_def_irq_enable(ecard_t *ec, int irqnr) { @@ -100,6 +486,11 @@ static void ecard_def_irq_disable(ecard_t *ec, int irqnr) #endif } +static int ecard_def_irq_pending(ecard_t *ec) +{ + return !ec->irqmask || ec->irqaddr[0] & ec->irqmask; +} + static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr) { panic("ecard_def_fiq_enable called - impossible"); @@ -110,11 +501,18 @@ static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr) panic("ecard_def_fiq_disable called - impossible"); } +static int ecard_def_fiq_pending(ecard_t *ec) +{ + return !ec->fiqmask || ec->fiqaddr[0] & ec->fiqmask; +} + static expansioncard_ops_t ecard_default_ops = { ecard_def_irq_enable, ecard_def_irq_disable, + ecard_def_irq_pending, ecard_def_fiq_enable, - ecard_def_fiq_disable + ecard_def_fiq_disable, + ecard_def_fiq_pending }; /* @@ -125,10 +523,9 @@ static expansioncard_ops_t ecard_default_ops = { */ void ecard_enableirq(unsigned int irqnr) { - irqnr &= 7; - if (irqnr < MAX_ECARDS && irqno_to_expcard[irqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[irqnr]; + ecard_t *ec = slot_to_ecard(irqnr - 32); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -142,10 +539,9 @@ void ecard_enableirq(unsigned int irqnr) void ecard_disableirq(unsigned int irqnr) { - irqnr &= 7; - if (irqnr < MAX_ECARDS && irqno_to_expcard[irqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[irqnr]; + ecard_t *ec = slot_to_ecard(irqnr - 32); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -156,10 +552,9 @@ void ecard_disableirq(unsigned int irqnr) void ecard_enablefiq(unsigned int fiqnr) { - fiqnr &= 7; - if (fiqnr < MAX_ECARDS && irqno_to_expcard[fiqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[fiqnr]; + ecard_t *ec = slot_to_ecard(fiqnr); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -173,10 +568,9 @@ void ecard_enablefiq(unsigned int fiqnr) void ecard_disablefiq(unsigned int fiqnr) { - fiqnr &= 7; - if (fiqnr < MAX_ECARDS && irqno_to_expcard[fiqnr] != -1) { - ecard_t *ec = expcard + irqno_to_expcard[fiqnr]; + ecard_t *ec = slot_to_ecard(fiqnr); + if (ec) { if (!ec->ops) ec->ops = &ecard_default_ops; @@ -185,41 +579,89 @@ void ecard_disablefiq(unsigned int fiqnr) } } -static void ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs) +static void +ecard_dump_irq_state(ecard_t *ec) { - const int num_cards = ecard_numirqcards; - int i, called = 0; + printk(" %d: %sclaimed, ", + ec->slot_no, + ec->claimed ? "" : "not "); + + if (ec->ops && ec->ops->irqpending && + ec->ops != &ecard_default_ops) + printk("irq %spending\n", + ec->ops->irqpending(ec) ? "" : "not "); + else + printk("irqaddr %p, mask = %02X, status = %02X\n", + ec->irqaddr, ec->irqmask, *ec->irqaddr); +} - for (i = 0; i < num_cards; i++) { - if (expcard[i].claimed && expcard[i].irq && - (!expcard[i].irqmask || - expcard[i].irqaddr[0] & expcard[i].irqmask)) { - do_ecard_IRQ(expcard[i].irq, regs); - called ++; +static void +ecard_check_lockup(void) +{ + static int last, lockup; + ecard_t *ec; + + /* + * If the timer interrupt has not run since the last million + * unrecognised expansion card interrupts, then there is + * something seriously wrong. Disable the expansion card + * interrupts so at least we can continue. + * + * Maybe we ought to start a timer to re-enable them some time + * later? + */ + if (last == jiffies) { + lockup += 1; + if (lockup > 1000000) { + printk(KERN_ERR "\nInterrupt lockup detected - " + "disabling all expansion card interrupts\n"); + + disable_irq(IRQ_EXPANSIONCARD); + + printk("Expansion card IRQ state:\n"); + + for (ec = cards; ec; ec = ec->next) + ecard_dump_irq_state(ec); } + } else + lockup = 0; + + /* + * If we did not recognise the source of this interrupt, + * warn the user, but don't flood the user with these messages. + */ + if (!last || time_after(jiffies, last + 5*HZ)) { + last = jiffies; + printk(KERN_WARNING "Unrecognised interrupt from backplane\n"); } - cli(); - if (called == 0) { - static int last, lockup; - - if (last == jiffies) { - lockup += 1; - if (lockup > 1000000) { - printk(KERN_ERR "\nInterrupt lockup detected - disabling expansion card IRQs\n"); - disable_irq(intr_no); - printk("Expansion card IRQ state:\n"); - for (i = 0; i < num_cards; i++) - printk(" %d: %sclaimed, irqaddr = %p, irqmask = %X, status=%X\n", expcard[i].irq - 32, - expcard[i].claimed ? "" : "not", expcard[i].irqaddr, expcard[i].irqmask, *expcard[i].irqaddr); - } - } else - lockup = 0; +} + +static void +ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs) +{ + ecard_t *ec; + int called = 0; + + for (ec = cards; ec; ec = ec->next) { + int pending; + + if (!ec->claimed || ec->irq == NO_IRQ || ec->slot_no == 8) + continue; - if (!last || time_after(jiffies, last + 5*HZ)) { - last = jiffies; - printk(KERN_ERR "\nUnrecognised interrupt from backplane\n"); + if (ec->ops && ec->ops->irqpending) + pending = ec->ops->irqpending(ec); + else + pending = ecard_default_ops.irqpending(ec); + + if (pending) { + do_ecard_IRQ(ec->irq, regs); + called ++; } } + cli(); + + if (called == 0) + ecard_check_lockup(); } #ifdef HAS_EXPMASK @@ -234,31 +676,35 @@ static unsigned char first_set[] = 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 }; -static void ecard_irq_expmask(int intr_no, void *dev_id, struct pt_regs *regs) +static void +ecard_irq_expmask(int intr_no, void *dev_id, struct pt_regs *regs) { const unsigned int statusmask = 15; unsigned int status; status = EXPMASK_STATUS & statusmask; if (status) { - unsigned int irqno; + unsigned int slot; ecard_t *ec; again: - irqno = first_set[status]; - ec = expcard + irqno_to_expcard[irqno]; + slot = first_set[status]; + ec = slot_to_ecard(slot); if (ec->claimed) { unsigned int oldexpmask; /* - * this ugly code is so that we can operate a prioritorising system. + * this ugly code is so that we can operate a + * prioritorising system: + * * Card 0 highest priority * Card 1 * Card 2 * Card 3 lowest priority + * * Serial cards should go in 0/1, ethernet/scsi in 2/3 * otherwise you will lose serial data at high speeds! */ oldexpmask = have_expmask; - EXPMASK_ENABLE = (have_expmask &= priority_masks[irqno]); + EXPMASK_ENABLE = (have_expmask &= priority_masks[slot]); sti(); do_ecard_IRQ(ec->irq, regs); cli(); @@ -267,15 +713,18 @@ again: if (status) goto again; } else { - printk(KERN_WARNING "card%d: interrupt from unclaimed card???\n", irqno); - EXPMASK_ENABLE = (have_expmask &= ~(1 << irqno)); + printk(KERN_WARNING "card%d: interrupt from unclaimed " + "card???\n", slot); + EXPMASK_ENABLE = (have_expmask &= ~(1 << slot)); } } else printk(KERN_WARNING "Wild interrupt from backplane (masks)\n"); } -static int ecard_checkirqhw(void) +__initfunc(static void +ecard_probeirqhw(void)) { + ecard_t *ec; int found; EXPMASK_ENABLE = 0x00; @@ -283,62 +732,80 @@ static int ecard_checkirqhw(void) found = ((EXPMASK_STATUS & 15) == 0); EXPMASK_ENABLE = 0xff; - return found; + if (!found) + return; + + printk(KERN_DEBUG "Expansion card interrupt " + "management hardware found\n"); + + irqexpansioncard.handler = ecard_irq_expmask; + + /* for each card present, set a bit to '1' */ + have_expmask = 0x80000000; + + for (ec = cards; ec; ec = ec->next) + have_expmask |= 1 << ec->slot_no; + + EXPMASK_ENABLE = have_expmask; } +#else +#define ecard_probeirqhw() +#endif + +#ifndef IO_EC_MEMC8_BASE +#define IO_EC_MEMC8_BASE 0 #endif -static void ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld) +unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) { - extern int ecard_loader_read(int off, volatile unsigned int pa, loader_t loader); - unsigned char *a = (unsigned char *)addr; + unsigned long address = 0; + int slot = ec->slot_no; - if (ec->slot_no == 8) { - static unsigned int lowaddress; - unsigned int laddr, haddr; - unsigned char byte = 0; /* keep gcc quiet */ + if (ec->slot_no == 8) + return IO_EC_MEMC8_BASE; - laddr = off & 4095; /* number of bytes to read from offset + base addr */ - haddr = off >> 12; /* offset into card from base addr */ + ectcr &= ~(1 << slot); - if (haddr > 256) - return; + switch (type) { + case ECARD_MEMC: + if (slot < 4) + address = IO_EC_MEMC_BASE + (slot << 12); + break; - /* - * If we require a low address or address 0, then reset, and start again... - */ - if (!off || lowaddress > laddr) { - outb(0, ec->podaddr); - lowaddress = 0; - } - while (lowaddress <= laddr) { - byte = inb(ec->podaddr + haddr); - lowaddress += 1; - } - while (len--) { - *a++ = byte; - if (len) { - byte = inb(ec->podaddr + haddr); - lowaddress += 1; - } - } - } else { - if (!useld || !ec->loader) { - while(len--) - *a++ = inb(ec->podaddr + (off++)); - } else { - while(len--) { - *(unsigned long *)0x108 = 0; /* hack for some loaders!!! */ - *a++ = ecard_loader_read(off++, BUS_ADDR(ec->podaddr), ec->loader); - } - } + case ECARD_IOC: + if (slot < 4) + address = IO_EC_IOC_BASE + (slot << 12); +#ifdef IO_EC_IOC4_BASE + else + address = IO_EC_IOC4_BASE + ((slot - 4) << 12); +#endif + if (address) + address += speed << 17; + break; + +#ifdef IO_EC_EASI_BASE + case ECARD_EASI: + address = IO_EC_EASI_BASE + (slot << 22); + if (speed == ECARD_FAST) + ectcr |= 1 << slot; + break; +#endif } + +#ifdef IOMD_ECTCR + outb(ectcr, IOMD_ECTCR); +#endif + return address; } +static const char *unknown = "*unknown*"; + static int ecard_prints(char *buffer, ecard_t *ec) { char *start = buffer; - buffer += sprintf(buffer, "\n %d: ", ec->slot_no); + buffer += sprintf(buffer, " %d: %s ", ec->slot_no, + ec->type == ECARD_EASI ? "EASI" : " "); if (ec->cid.id == 0) { struct in_chunk_dir incd; @@ -346,28 +813,57 @@ static int ecard_prints(char *buffer, ecard_t *ec) buffer += sprintf(buffer, "[%04X:%04X] ", ec->cid.manufacturer, ec->cid.product); - if (!ec->card_desc && ec->cid.is && ec->cid.cd && - ecard_readchunk(&incd, ec, 0xf5, 0)) - ec->card_desc = incd.d.string; + if (!ec->card_desc && ec->cid.cd && + ecard_readchunk(&incd, ec, 0xf5, 0)) { + ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL); - if (!ec->card_desc) - ec->card_desc = "*unknown*"; + if (ec->card_desc) + strcpy(ec->card_desc, incd.d.string); + } - buffer += sprintf(buffer, "%s", ec->card_desc); + buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*"); } else - buffer += sprintf(buffer, "Simple card %d", ec->cid.id); + buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id); return buffer - start; } -static inline unsigned short ecard_getu16(unsigned char *v) +int get_ecard_dev_info(char *buf, char **start, off_t pos, int count, int wr) { - return v[0] | v[1] << 8; + ecard_t *ec = cards; + off_t at = 0; + int len, cnt; + + cnt = 0; + while (ec && count > cnt) { + len = ecard_prints(buf, ec); + at += len; + if (at >= pos) { + if (!*start) { + *start = buf + (pos - (at - len)); + cnt = at - pos; + } else + cnt += len; + buf += len; + } + ec = ec->next; + } + return (count > cnt) ? cnt : count; } -static inline signed long ecard_gets24(unsigned char *v) +static struct proc_dir_entry proc_ecard_devices = { + PROC_BUS_ECARD_DEVICES, 7, "devices", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations, + get_ecard_dev_info +}; + +static struct proc_dir_entry *proc_bus_ecard_dir; + +static void ecard_proc_init(void) { - return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0); + proc_bus_ecard_dir = create_proc_entry("ecard", S_IFDIR, proc_bus); + proc_register(proc_bus_ecard_dir, &proc_ecard_devices); } /* @@ -376,33 +872,39 @@ static inline signed long ecard_gets24(unsigned char *v) * If bit 1 of the first byte of the card is set, then the * card does not exist. */ -__initfunc(static int ecard_probe(int card, int freeslot, card_type_t type)) +__initfunc(static int +ecard_probe(int slot, card_type_t type)) { - ecard_t *ec = expcard + freeslot; + ecard_t **ecp; + ecard_t *ec; struct ex_ecid cid; char buffer[200]; - int i; + int i, rc = -ENOMEM; + + ec = kmalloc(sizeof(ecard_t), GFP_KERNEL); + + if (!ec) + goto nodev; - irqno_to_expcard[card] = -1; + memset(ec, 0, sizeof(ecard_t)); - ec->slot_no = card; + ec->slot_no = slot; + ec->type = type; ec->irq = NO_IRQ; ec->fiq = NO_IRQ; ec->dma = NO_DMA; ec->card_desc = NULL; ec->ops = &ecard_default_ops; + rc = -ENODEV; if ((ec->podaddr = ecard_address(ec, type, ECARD_SYNC)) == 0) - return 0; + goto nodev; cid.r_zero = 1; ecard_readbytes(&cid, ec, 0, 16, 0); if (cid.r_zero) - return 0; + goto nodev; - irqno_to_expcard[card] = freeslot; - - ec->type = type; ec->cid.id = cid.r_id; ec->cid.cd = cid.r_cd; ec->cid.is = cid.r_is; @@ -415,9 +917,9 @@ __initfunc(static int ecard_probe(int card, int freeslot, card_type_t type)) ec->cid.fiqmask = cid.r_fiqmask; ec->cid.fiqoff = ecard_gets24(cid.r_fiqoff); ec->fiqaddr = - ec->irqaddr = (unsigned char *)BUS_ADDR(ec->podaddr); + ec->irqaddr = (unsigned char *)ioaddr(ec->podaddr); - if (ec->cid.cd && ec->cid.is) { + if (ec->cid.is) { ec->irqmask = ec->cid.irqmask; ec->irqaddr += ec->cid.irqoff; ec->fiqmask = ec->cid.fiqmask; @@ -430,88 +932,69 @@ __initfunc(static int ecard_probe(int card, int freeslot, card_type_t type)) for (i = 0; i < sizeof(blacklist) / sizeof(*blacklist); i++) if (blacklist[i].manufacturer == ec->cid.manufacturer && blacklist[i].product == ec->cid.product) { - ec->loader = blacklist[i].loader; ec->card_desc = blacklist[i].type; break; } - ecard_prints(buffer, ec); - printk("%s", buffer); - - ec->irq = 32 + card; + ec->irq = 32 + slot; #ifdef IO_EC_MEMC8_BASE - if (card == 8) + if (slot == 8) ec->irq = 11; #endif #ifdef CONFIG_ARCH_RPC /* On RiscPC, only first two slots have DMA capability */ - if (card < 2) - ec->dma = 2 + card; + if (slot < 2) + ec->dma = 2 + slot; #endif #if 0 /* We don't support FIQs on expansion cards at the moment */ - ec->fiq = 96 + card; + ec->fiq = 96 + slot; #endif - return 1; -} + rc = 0; -/* - * This is called to reset the loaders for each expansion card on reboot. - * - * This is required to make sure that the card is in the correct state - * that RiscOS expects it to be. - */ -void ecard_reset(int card) -{ - extern int ecard_loader_reset(volatile unsigned int pa, loader_t loader); + for (ecp = &cards; *ecp; ecp = &(*ecp)->next); - if (card >= ecard_numcards) - return; - - if (card < 0) { - for (card = 0; card < ecard_numcards; card++) - if (expcard[card].loader) - ecard_loader_reset(BUS_ADDR(expcard[card].podaddr), - expcard[card].loader); - } else - if (expcard[card].loader) - ecard_loader_reset(BUS_ADDR(expcard[card].podaddr), - expcard[card].loader); + *ecp = ec; -#ifdef HAS_EXPMASK - if (have_expmask) { - have_expmask |= ~0; - EXPMASK_ENABLE = have_expmask; +nodev: + if (rc && ec) + kfree(ec); + else { + slot_to_expcard[slot] = ec; + + ecard_prints(buffer, ec); + printk("%s", buffer); } -#endif + return rc; } -static unsigned int ecard_startcard; +static ecard_t *finding_pos; void ecard_startfind(void) { - ecard_startcard = 0; + finding_pos = NULL; } ecard_t *ecard_find(int cid, const card_ids *cids) { - int card; + if (!finding_pos) + finding_pos = cards; + else + finding_pos = finding_pos->next; + + for (; finding_pos; finding_pos = finding_pos->next) { + if (finding_pos->claimed) + continue; - if (!cids) { - for (card = ecard_startcard; card < ecard_numcards; card++) - if (!expcard[card].claimed && - (expcard[card].cid.id ^ cid) == 0) + if (!cids) { + if ((finding_pos->cid.id ^ cid) == 0) break; - } else { - for (card = ecard_startcard; card < ecard_numcards; card++) { + } else { unsigned int manufacturer, product; int i; - if (expcard[card].claimed) - continue; - - manufacturer = expcard[card].cid.manufacturer; - product = expcard[card].cid.product; + manufacturer = finding_pos->cid.manufacturer; + product = finding_pos->cid.product; for (i = 0; cids[i].manufacturer != 65535; i++) if (manufacturer == cids[i].manufacturer && @@ -523,111 +1006,24 @@ ecard_t *ecard_find(int cid, const card_ids *cids) } } - ecard_startcard = card + 1; - - return card < ecard_numcards ? &expcard[card] : NULL; + return finding_pos; } -int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num) +__initfunc(static void ecard_free_all(void)) { - struct ex_chunk_dir excd; - int index = 16; - int useld = 0; + ecard_t *ec, *ecn; - if (!ec->cid.is || !ec->cid.cd) - return 0; - - while(1) { - ecard_readbytes(&excd, ec, index, 8, useld); - index += 8; - if (c_id(&excd) == 0) { - if (!useld && ec->loader) { - useld = 1; - index = 0; - continue; - } - return 0; - } - if (c_id(&excd) == 0xf0) { /* link */ - index = c_start(&excd); - continue; - } - if (c_id(&excd) == 0x80) { /* loader */ - if (!ec->loader) { - ec->loader = (loader_t)kmalloc(c_len(&excd), GFP_KERNEL); - ecard_readbytes(ec->loader, ec, (int)c_start(&excd), c_len(&excd), useld); - } - continue; - } - if (c_id(&excd) == id && num-- == 0) - break; - } + for (ec = cards; ec; ec = ecn) { + ecn = ec->next; - if (c_id(&excd) & 0x80) { - switch (c_id(&excd) & 0x70) { - case 0x70: - ecard_readbytes((unsigned char *)excd.d.string, ec, - (int)c_start(&excd), c_len(&excd), useld); - break; - case 0x00: - break; - } + kfree(ec); } - cd->start_offset = c_start(&excd); - memcpy(cd->d.string, excd.d.string, 256); - return 1; -} - -unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) -{ - switch (ec->slot_no) { - case 0 ... 3: - switch (type) { - case ECARD_MEMC: - return IO_EC_MEMC_BASE + (ec->slot_no << 12); - - case ECARD_IOC: - return IO_EC_IOC_BASE + (speed << 17) + (ec->slot_no << 12); - -#ifdef IO_EC_EASI_BASE - case ECARD_EASI: - return IO_EC_EASI_BASE + (ec->slot_no << 22); -#endif - } - break; - case 4 ... 7: - switch (type) { -#ifdef IO_EC_IOC4_BASE - case ECARD_IOC: - return IO_EC_IOC4_BASE + (speed << 17) + ((ec->slot_no - 4) << 12); -#endif -#ifdef IO_EC_EASI_BASE - case ECARD_EASI: - return IO_EC_EASI_BASE + (ec->slot_no << 22); -#endif - default: - break; - } - break; + cards = NULL; -#ifdef IO_EC_MEMC8_BASE - case 8: - return IO_EC_MEMC8_BASE; -#endif - } - return 0; + memset(slot_to_expcard, 0, sizeof(slot_to_expcard)); } -static struct irqaction irqexpansioncard = { - ecard_irq_noexpmask, - SA_INTERRUPT, - 0, - "expansion cards", - NULL, - NULL -}; - /* * Initialise the expansion card system. * Locate all hardware - interrupt management and @@ -635,51 +1031,38 @@ static struct irqaction irqexpansioncard = { */ __initfunc(void ecard_init(void)) { - int i, nc = 0; + int slot; - memset(expcard, 0, sizeof(expcard)); + oldlatch_init(); -#ifdef HAS_EXPMASK - if (ecard_checkirqhw()) { - printk(KERN_DEBUG "Expansion card interrupt management hardware found\n"); - irqexpansioncard.handler = ecard_irq_expmask; - irqexpansioncard.flags |= SA_IRQNOMASK; - have_expmask = -1; - } +#ifdef CONFIG_CPU_32 + init_waitqueue_head(&ecard_wait); + init_waitqueue_head(&ecard_done); #endif - printk("Installed expansion cards:"); + printk("Probing expansion cards: (does not imply support)\n"); - /* - * First of all, probe all cards on the expansion card interrupt line - */ - for (i = 0; i < 8; i++) - if (ecard_probe(i, nc, ECARD_IOC) || ecard_probe(i, nc, ECARD_EASI)) - nc += 1; - else - have_expmask &= ~(1<<i); - - ecard_numirqcards = nc; + for (slot = 0; slot < 8; slot ++) { + if (ecard_probe(slot, ECARD_EASI) == -ENODEV) + ecard_probe(slot, ECARD_IOC); + } - /* Now probe other cards with different interrupt lines - */ #ifdef IO_EC_MEMC8_BASE - if (ecard_probe(8, nc, ECARD_IOC)) - nc += 1; + ecard_probe(8, ECARD_IOC); #endif - printk("\n"); - ecard_numcards = nc; + ecard_probeirqhw(); - if (nc && setup_arm_irq(IRQ_EXPANSIONCARD, &irqexpansioncard)) { - printk("Could not allocate interrupt for expansion cards\n"); - return; + if (setup_arm_irq(IRQ_EXPANSIONCARD, &irqexpansioncard)) { + printk(KERN_ERR "Unable to claim IRQ%d for expansion cards\n", + IRQ_EXPANSIONCARD); + ecard_free_all(); } - -#ifdef HAS_EXPMASK - if (nc && have_expmask) - EXPMASK_ENABLE = have_expmask; -#endif - oldlatch_init(); + ecard_proc_init(); } + +EXPORT_SYMBOL(ecard_startfind); +EXPORT_SYMBOL(ecard_find); +EXPORT_SYMBOL(ecard_readchunk); +EXPORT_SYMBOL(ecard_address); diff --git a/arch/arm/kernel/entry-armo.S b/arch/arm/kernel/entry-armo.S index 3ca29cd2c..758163f07 100644 --- a/arch/arm/kernel/entry-armo.S +++ b/arch/arm/kernel/entry-armo.S @@ -159,8 +159,8 @@ irq_prio_h: .byte 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 .macro restore_user_regs ldmia sp, {r0 - lr}^ mov r0, r0 - add sp, sp, #15*4 - ldr lr, [sp], #8 + ldr lr, [sp, #15*4] + add sp, sp, #15*4+8 movs pc, lr .endm @@ -226,13 +226,6 @@ irq_prio_h: .byte 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10 str r0, [sp, #S_OLD_R0] ;\ mov fp, #0 -#define USER_RESTORE_ALL \ - ldmia sp, {r0 - lr}^ ;\ - mov r0, r0 ;\ - add sp, sp, #15*4 ;\ - ldr lr, [sp], #8 ;\ - movs pc, lr - #define SVC_RESTORE_ALL \ ldmfd sp, {r0 - pc}^ @@ -253,7 +246,7 @@ _unexp_fiq: ldr sp, .LCfiq mov r0, r0 movs pc, lr -Lfiqmsg: .ascii "*** Unexpeced FIQ\n\0" +Lfiqmsg: .ascii "*** Unexpected FIQ\n\0" .align .LCfiq: .word __temp_fiq @@ -315,14 +308,14 @@ fpe_not_present: and r4, r10, #255 @ get offset and r6, r10, #0x000f0000 tst r10, #0x00800000 @ +/- - rsbeq r4, r4, #0 ldr r5, [sp, r6, lsr #14] @ Load reg + rsbeq r4, r4, #0 add r5, r5, r4, lsl #2 str r5, [sp, r6, lsr #14] @ Save reg b ret_from_exception -wfs_mask_data: .word 0x0e200110 @ WFS - .word 0x0fff0fff +wfs_mask_data: .word 0x0e200110 @ WFS/RFS + .word 0x0fef0fff .word 0x0d0d0100 @ LDF [sp]/STF [sp] .word 0x0d0b0100 @ LDF [fp]/STF [fp] .word 0x0f0f0f00 @@ -341,8 +334,7 @@ vector_prefetch: save_user_regs teqp pc, #0x00000003 @ NOT a problem - doesnt change mode mask_pc r0, lr @ Address of abort - mov r1, #FAULT_CODE_PREFETCH|FAULT_CODE_USER @ Error code - mov r2, sp @ Tasks registers + mov r1, sp @ Tasks registers bl SYMBOL_NAME(do_PrefetchAbort) teq r0, #0 @ If non-zero, we believe this abort.. bne ret_from_sys_call @@ -451,6 +443,7 @@ vector_IRQ: ldr r13, .LCirq @ I will leave this one in just in case... adr lr, 1b orr lr, lr, #0x08000003 @ Force SVC bne do_IRQ + mov r4, #0 b ret_with_reschedule irq_prio_table @@ -562,8 +555,8 @@ Ldata_ldrstr_numindex: and r0, r0, #15 << 2 @ Mask out reg. teq r0, #15 << 2 ldr r0, [r3, r0] @ Get register - biceq r0, r0, #PCMASK mov r1, r4, lsl #20 + biceq r0, r0, #PCMASK tst r4, #1 << 23 addne r0, r0, r1, lsr #20 subeq r0, r0, r1, lsr #20 @@ -578,12 +571,12 @@ Ldata_ldrstr_regindex: and r0, r0, #15 << 2 @ Mask out reg. teq r0, #15 << 2 ldr r0, [r3, r0] @ Get register - biceq r0, r0, #PCMASK and r7, r4, #15 + biceq r0, r0, #PCMASK teq r7, #15 @ Check for PC ldr r7, [r3, r7, lsl #2] @ Get Rm - biceq r7, r7, #PCMASK and r8, r4, #0x60 @ Get shift types + biceq r7, r7, #PCMASK mov r9, r4, lsr #7 @ Get shift amount and r9, r9, #31 teq r8, #0 @@ -645,8 +638,8 @@ Ldata_ldcstc_pre: and r0, r0, #15 << 2 @ Mask out reg. teq r0, #15 << 2 ldr r0, [r3, r0] @ Get register - biceq r0, r0, #PCMASK mov r1, r4, lsl #24 @ Get offset + biceq r0, r0, #PCMASK tst r4, #1 << 23 addne r0, r0, r1, lsr #24 subeq r0, r0, r1, lsr #24 @@ -656,9 +649,54 @@ Ldata_ldcstc_pre: #endif b SYMBOL_NAME(do_DataAbort) -#include "entry-common.S" +/* + *============================================================================= + * Low-level interface code + *----------------------------------------------------------------------------- + * Trap initialisation + *----------------------------------------------------------------------------- + * + * Note - FIQ code has changed. The default is a couple of words in 0x1c, 0x20 + * that call _unexp_fiq. Nowever, we now copy the FIQ routine to 0x1c (removes + * some excess cycles). + * + * What we need to put into 0-0x1c are branches to branch to the kernel. + */ - .data + .section ".text.init",#alloc,#execinstr + +.Ljump_addresses: + swi SYS_ERROR0 + .word vector_undefinstr - 12 + .word vector_swi - 16 + .word vector_prefetch - 20 + .word vector_data - 24 + .word vector_addrexcptn - 28 + .word vector_IRQ - 32 + .word _unexp_fiq - 36 + b . + 8 +/* + * initialise the trap system + */ +ENTRY(trap_init) + stmfd sp!, {r4 - r7, lr} + adr r1, .Ljump_addresses + ldmia r1, {r1 - r7, ip, lr} + orr r2, lr, r2, lsr #2 + orr r3, lr, r3, lsr #2 + orr r4, lr, r4, lsr #2 + orr r5, lr, r5, lsr #2 + orr r6, lr, r6, lsr #2 + orr r7, lr, r7, lsr #2 + orr ip, lr, ip, lsr #2 + mov r0, #0 + stmia r0, {r1 - r7, ip} + ldmfd sp!, {r4 - r7, pc}^ + + .text + +#include "entry-common.S" -__temp_irq: .word 0 @ saved lr_irq + .bss +__temp_irq: .space 4 @ saved lr_irq __temp_fiq: .space 128 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bcc938b32..9456abe33 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -61,8 +61,12 @@ #define S_R1 4 #define S_R0 0 +#define OFF_CR_ALIGNMENT(x) cr_alignment - x + #ifdef IOC_BASE /* IOC / IOMD based hardware */ +#include <asm/iomd.h> + .equ ioc_base_high, IOC_BASE & 0xff000000 .equ ioc_base_low, IOC_BASE & 0x00ff0000 .macro disable_fiq @@ -186,113 +190,109 @@ irq_prio_ebsa110: .byte 6, 6, 6, 6, 2, 2, 2, 2, 3, 3, 6, 6, 2, 2, 2, 2 .endm -#elif defined(CONFIG_ARCH_EBSA285) +#elif defined(CONFIG_HOST_FOOTBRIDGE) || defined(CONFIG_ADDIN_FOOTBRIDGE) +#include <asm/dec21285.h> .macro disable_fiq .endm + .equ irq_mask_pci_err_high, IRQ_MASK_PCI_ERR & 0xff000000 + .equ irq_mask_pci_err_low, IRQ_MASK_PCI_ERR & 0x00ffffff + .equ dc21285_high, ARMCSR_BASE & 0xff000000 + .equ dc21285_low, ARMCSR_BASE & 0x00ffffff + .macro get_irqnr_and_base, irqnr, irqstat, base - mov r4, #0xfe000000 + mov r4, #dc21285_high + .if dc21285_low + orr r4, r4, #dc21285_low + .endif ldr \irqstat, [r4, #0x180] @ get interrupts - mov \irqnr, #0 -1001: tst \irqstat, #1 - addeq \irqnr, \irqnr, #1 - moveq \irqstat, \irqstat, lsr #1 - tsteq \irqnr, #32 - beq 1001b - teq \irqnr, #32 - .endm - .macro irq_prio_table - .endm - -#elif defined(CONFIG_ARCH_NEXUSPCI) + mov \irqnr, #IRQ_SDRAMPARITY + tst \irqstat, #IRQ_MASK_SDRAMPARITY + bne 1001f - .macro disable_fiq - .endm + tst \irqstat, #IRQ_MASK_UART_RX + movne \irqnr, #IRQ_CONRX + bne 1001f - .macro get_irqnr_and_base, irqnr, irqstat, base - ldr r4, =0xffe00000 - ldr \irqstat, [r4, #0x180] @ get interrupts - mov \irqnr, #0 -1001: tst \irqstat, #1 - addeq \irqnr, \irqnr, #1 - moveq \irqstat, \irqstat, lsr #1 - tsteq \irqnr, #32 - beq 1001b - teq \irqnr, #32 - .endm + tst \irqstat, #IRQ_MASK_DMA1 + movne \irqnr, #IRQ_DMA1 + bne 1001f - .macro irq_prio_table - .endm + tst \irqstat, #IRQ_MASK_DMA2 + movne \irqnr, #IRQ_DMA2 + bne 1001f -#elif defined(CONFIG_ARCH_VNC) + tst \irqstat, #IRQ_MASK_IN0 + movne \irqnr, #IRQ_IN0 + bne 1001f - .macro disable_fiq - .endm + tst \irqstat, #IRQ_MASK_IN1 + movne \irqnr, #IRQ_IN1 + bne 1001f - .equ pci_iack_high, PCI_IACK & 0xff000000 - .equ pci_iack_low, PCI_IACK & 0x00ff0000 + tst \irqstat, #IRQ_MASK_IN2 + movne \irqnr, #IRQ_IN2 + bne 1001f - .macro get_irqnr_and_base, irqnr, irqstat, base - mov r4, #IO_BASE_ARM_CSR - ldr \irqstat, [r4, #CSR_IRQ_STATUS] @ just show us the unmasked ones + tst \irqstat, #IRQ_MASK_IN3 + movne \irqnr, #IRQ_IN3 + bne 1001f - @ run through hard priorities - @ timer - tst \irqstat, #IRQ_MASK_TIMER0 - movne \irqnr, #IRQ_TIMER0 + tst \irqstat, #IRQ_MASK_PCI + movne \irqnr, #IRQ_PCI bne 1001f - @ ether10 - tst \irqstat, #IRQ_MASK_ETHER10 - movne \irqnr, #IRQ_ETHER10 + tst \irqstat, #IRQ_MASK_I2OINPOST + movne \irqnr, #IRQ_I2OINPOST bne 1001f - @ ether100 - tst \irqstat, #IRQ_MASK_ETHER100 - movne \irqnr, #IRQ_ETHER100 + tst \irqstat, #IRQ_MASK_TIMER1 + movne \irqnr, #IRQ_TIMER1 bne 1001f - @ video compressor - tst \irqstat, #IRQ_MASK_VIDCOMP - movne \irqnr, #IRQ_VIDCOMP + tst \irqstat, #IRQ_MASK_TIMER2 + movne \irqnr, #IRQ_TIMER2 bne 1001f - @ now try all the PIC sources - @ determine whether we have an irq - tst \irqstat, #IRQ_MASK_EXTERN_IRQ - beq 1002f - mov r4, #pci_iack_high - orr r4, r4, #pci_iack_low - ldrb \irqnr, [r4] @ get the IACK byte - b 1001f - -1002: @ PCI errors - tst \irqstat, #IRQ_MASK_PCI_ERR - movne \irqnr, #IRQ_PCI_ERR + tst \irqstat, #IRQ_MASK_TIMER3 + movne \irqnr, #IRQ_TIMER3 bne 1001f - @ softint - tst \irqstat, #IRQ_MASK_SOFTIRQ - movne \irqnr, #IRQ_SOFTIRQ + tst \irqstat, #IRQ_MASK_UART_TX + movne \irqnr, #IRQ_CONTX bne 1001f - @ debug uart - tst \irqstat, #IRQ_MASK_UART_DEBUG - movne \irqnr, #IRQ_CONRX + tst \irqstat, #irq_mask_pci_err_high + tsteq \irqstat, #irq_mask_pci_err_low + movne \irqnr, #IRQ_PCI_ERR bne 1001f +1001: + .endm - @ watchdog - tst \irqstat, #IRQ_MASK_WATCHDOG - movne \irqnr, #IRQ_WATCHDOG + .macro irq_prio_table + .endm -1001: @ If Z is set, then we will not enter an interrupt +#elif defined(CONFIG_ARCH_NEXUSPCI) + + .macro disable_fiq .endm - .macro irq_prio_table + .macro get_irqnr_and_base, irqnr, irqstat, base + ldr r4, =0xffe00000 + ldr \irqstat, [r4, #0x180] @ get interrupts + mov \irqnr, #0 +1001: tst \irqstat, #1 + addeq \irqnr, \irqnr, #1 + moveq \irqstat, \irqstat, lsr #1 + tsteq \irqnr, #32 + beq 1001b + teq \irqnr, #32 .endm + .macro irq_prio_table + .endm #else #error Unknown architecture #endif @@ -306,22 +306,22 @@ irq_prio_ebsa110: stmia sp, {r0 - r12} @ Calling r0 - r12 add r8, sp, #S_PC stmdb r8, {sp, lr}^ @ Calling sp, lr - mov r7, r0 + str lr, [r8, #0] @ Save calling PC mrs r6, spsr - mov r5, lr - stmia r8, {r5, r6, r7} @ Save calling PC, CPSR, OLD_R0 + str r6, [r8, #4] @ Save CPSR + str r0, [r8, #8] @ Save OLD_R0 .endm .macro restore_user_regs - mrs r0, cpsr @ disable IRQs - orr r0, r0, #I_BIT - msr cpsr, r0 + mrs r1, cpsr @ disable IRQs + orr r1, r1, #I_BIT ldr r0, [sp, #S_PSR] @ Get calling cpsr + msr cpsr, r1 msr spsr, r0 @ save in spsr_svc ldmia sp, {r0 - lr}^ @ Get calling r0 - lr mov r0, r0 - add sp, sp, #S_PC - ldr lr, [sp], #S_FRAME_SIZE - S_PC @ Get PC and jump over PC, PSR, OLD_R0 + ldr lr, [sp, #S_PC] @ Get PC + add sp, sp, #S_FRAME_SIZE movs pc, lr @ return & move spsr_svc into cpsr .endm @@ -348,25 +348,6 @@ irq_prio_ebsa110: msr cpsr, \temp .endm - .macro initialise_traps_extra - mrs r0, cpsr - bic r0, r0, #31 - orr r0, r0, #0xd3 - msr cpsr, r0 - .endm - - -#ifndef __ARM_ARCH_4__ -.Larm700bug: str lr, [r8] - ldr r0, [sp, #S_PSR] @ Get calling cpsr - msr spsr, r0 - ldmia sp, {r0 - lr}^ @ Get calling r0 - lr - mov r0, r0 - add sp, sp, #S_PC - ldr lr, [sp], #S_FRAME_SIZE - S_PC @ Get PC and jump over PC, PSR, OLD_R0 - movs pc, lr -#endif - .macro get_current_task, rd mov \rd, sp, lsr #13 mov \rd, \rd, lsl #13 @@ -379,231 +360,89 @@ irq_prio_ebsa110: adr\cond \reg, \label .endm -/*============================================================================= - * Address exception handler - *----------------------------------------------------------------------------- - * These aren't too critical. - * (they're not supposed to happen, and won't happen in 32-bit mode). - */ - -vector_addrexcptn: - b vector_addrexcptn - -/*============================================================================= - * Undefined FIQs - *----------------------------------------------------------------------------- - * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC - * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg. - * Basically to switch modes, we *HAVE* to clobber one register... brain - * damage alert! I don't think that we can execute any code in here in any - * other mode than FIQ... Ok you can switch to another mode, but you can't - * get out of that mode without clobbering one register. - */ -_unexp_fiq: disable_fiq - subs pc, lr, #4 - -/*============================================================================= - * Interrupt entry dispatcher - *----------------------------------------------------------------------------- - * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC - */ -vector_IRQ: @ - @ save mode specific registers - @ - ldr r13, .LCirq - sub lr, lr, #4 - str lr, [r13] @ save lr_IRQ - mrs lr, spsr - str lr, [r13, #4] @ save spsr_IRQ - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr @ switch to SVC mode - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - cmp lr, #4 - addlts pc, pc, lr, lsl #2 @ Changes mode and branches - b __irq_invalid @ 4 - 15 - b __irq_usr @ 0 (USR_26 / USR_32) - b __irq_invalid @ 1 (FIQ_26 / FIQ_32) - b __irq_invalid @ 2 (IRQ_26 / IRQ_32) - b __irq_svc @ 3 (SVC_26 / SVC_32) -/* - *------------------------------------------------------------------------------------------------ - * Undef instr entry dispatcher - dispatches it to the correct handler for the processor mode - *------------------------------------------------------------------------------------------------ - * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC - */ -.LCirq: .word __temp_irq -.LCund: .word __temp_und -.LCabt: .word __temp_abt - -vector_undefinstr: - @ - @ save mode specific registers - @ - ldr r13, [pc, #.LCund - . - 8] - str lr, [r13] - mrs lr, spsr - str lr, [r13, #4] - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - cmp lr, #4 - addlts pc, pc, lr, lsl #2 @ Changes mode and branches - b __und_invalid @ 4 - 15 - b __und_usr @ 0 (USR_26 / USR_32) - b __und_invalid @ 1 (FIQ_26 / FIQ_32) - b __und_invalid @ 2 (IRQ_26 / IRQ_32) - b __und_svc @ 3 (SVC_26 / SVC_32) -/* - *------------------------------------------------------------------------------------------------ - * Prefetch abort dispatcher - dispatches it to the correct handler for the processor mode - *------------------------------------------------------------------------------------------------ - * Enter in ABT mode, spsr = USR CPSR, lr = USR PC - */ -vector_prefetch: - @ - @ save mode specific registers - @ - sub lr, lr, #4 - ldr r13, .LCabt - str lr, [r13] - mrs lr, spsr - str lr, [r13, #4] - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - adds pc, pc, lr, lsl #2 @ Changes mode and branches - b __pabt_invalid @ 4 - 15 - b __pabt_usr @ 0 (USR_26 / USR_32) - b __pabt_invalid @ 1 (FIQ_26 / FIQ_32) - b __pabt_invalid @ 2 (IRQ_26 / IRQ_32) - b __pabt_invalid @ 3 (SVC_26 / SVC_32) /* - *------------------------------------------------------------------------------------------------ - * Data abort dispatcher - dispatches it to the correct handler for the processor mode - *------------------------------------------------------------------------------------------------ - * Enter in ABT mode, spsr = USR CPSR, lr = USR PC + * Invalid mode handlers */ -vector_data: @ - @ save mode specific registers - @ - sub lr, lr, #8 - ldr r13, .LCabt - str lr, [r13] - mrs lr, spsr - str lr, [r13, #4] - @ - @ now branch to the relevent MODE handling routine - @ - mrs sp, cpsr - bic sp, sp, #31 - orr sp, sp, #0x13 - msr spsr, sp - and lr, lr, #15 - cmp lr, #4 - addlts pc, pc, lr, lsl #2 @ Changes mode & branches - b __dabt_invalid @ 4 - 15 - b __dabt_usr @ 0 (USR_26 / USR_32) - b __dabt_invalid @ 1 (FIQ_26 / FIQ_32) - b __dabt_invalid @ 2 (IRQ_26 / IRQ_32) - b __dabt_svc @ 3 (SVC_26 / SVC_32) +__pabt_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go + stmia sp, {r0 - lr} @ Save XXX r0 - lr + ldr r4, .LCabt + mov r1, #BAD_PREFETCH + b 1f -/*============================================================================= - * Prefetch abort handler - *----------------------------------------------------------------------------- - */ -pabtmsg: .ascii "Pabt: %08lX\n\0" - .align -__pabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go - stmia sp, {r0 - r12} @ Save r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ @ Save sp_usr lr_usr +__dabt_invalid: sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - lr} @ Save SVC r0 - lr [lr *should* be intact] ldr r4, .LCabt - ldmia r4, {r5 - r7} @ Get USR pc, cpsr - stmia r8, {r5 - r7} @ Save USR pc, cpsr, old_r0 + mov r1, #BAD_DATA + b 1f - mrs r7, cpsr @ Enable interrupts if they were - bic r7, r7, #I_BIT @ previously - msr cpsr, r7 - mov r0, r5 @ address (pc) - mov r1, sp @ regs - bl SYMBOL_NAME(do_PrefetchAbort) @ call abort handler - teq r0, #0 @ Does this still apply??? - bne ret_from_exception @ Return from exception -#ifdef DEBUG_UNDEF - adr r0, t - bl SYMBOL_NAME(printk) -#endif - mov r0, r5 - mov r1, sp - and r2, r6, #31 - bl SYMBOL_NAME(do_undefinstr) - ldr lr, [sp, #S_PSR] @ Get USR cpsr - msr spsr, lr - ldmia sp, {r0 - pc}^ @ Restore USR registers +__irq_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate space on stack for frame + stmfd sp, {r0 - lr} @ Save r0 - lr + ldr r4, .LCirq + mov r1, #BAD_IRQ + b 1f -__pabt_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go - stmia sp, {r0 - lr} @ Save XXX r0 - lr - mov r7, r0 @ OLD R0 - ldr r4, .LCabt - ldmia r4, {r5 - r7} @ Get XXX pc, cpsr +__und_invalid: sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - lr} + ldr r4, .LCund + mov r1, #BAD_UNDEFINSTR @ int reason + +1: mov fp, #0 + ldmia r4, {r5 - r7} @ Get XXX pc, cpsr, old_r0 add r4, sp, #S_PC stmia r4, {r5 - r7} @ Save XXX pc, cpsr, old_r0 - mov r0, sp @ Prefetch aborts are definitely *not* - mov r1, #BAD_PREFETCH @ allowed in non-user modes. We cant - and r2, r6, #31 @ recover from this problem. + mov r0, sp + and r2, r6, #31 @ int mode b SYMBOL_NAME(bad_mode) -#ifdef DEBUG_UNDEF -t: .ascii "*** undef ***\r\n\0" - .align -#endif -/*============================================================================= - * Data abort handler code - *----------------------------------------------------------------------------- - */ -.LCprocfns: .word SYMBOL_NAME(processor) +wfs_mask_data: .word 0x0e200110 @ WFS/RFS + .word 0x0fef0fff + .word 0x0d0d0100 @ LDF [sp]/STF [sp] + .word 0x0d0b0100 @ LDF [fp]/STF [fp] + .word 0x0f0f0f00 -__dabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go - stmia sp, {r0 - r12} @ save r0 - r12 - add r3, sp, #S_PC - stmdb r3, {sp, lr}^ - ldr r0, .LCabt - ldmia r0, {r0 - r2} @ Get USR pc, cpsr - stmia r3, {r0 - r2} @ Save USR pc, cpsr, old_r0 - mov fp, #0 - mrs r2, cpsr @ Enable interrupts if they were - bic r2, r2, #I_BIT @ previously - msr cpsr, r2 - ldr r2, .LCprocfns - mov lr, pc - ldr pc, [r2, #8] @ call processor specific code - mov r3, sp - bl SYMBOL_NAME(do_DataAbort) - b ret_from_sys_call +/* We get here if an undefined instruction happens and the floating + * point emulator is not present. If the offending instruction was + * a WFS, we just perform a normal return as if we had emulated the + * operation. This is a hack to allow some basic userland binaries + * to run so that the emulator module proper can be loaded. --philb + */ +fpe_not_present: + adr r10, wfs_mask_data + ldmia r10, {r4, r5, r6, r7, r8} + ldr r10, [sp, #S_PC] @ Load PC + sub r10, r10, #-4 + mask_pc r10, r10 + ldrt r10, [r10] @ get instruction + and r5, r10, r5 + teq r5, r4 @ Is it WFS? + moveq pc, r9 + and r5, r10, r8 + teq r5, r6 @ Is it LDF/STF on sp or fp? + teqne r5, r7 + movne pc, lr + tst r10, #0x00200000 @ Does it have WB + moveq pc, r9 + and r4, r10, #255 @ get offset + and r6, r10, #0x000f0000 + tst r10, #0x00800000 @ +/- + ldr r5, [sp, r6, lsr #14] @ Load reg + rsbeq r4, r4, #0 + add r5, r5, r4, lsl #2 + str r5, [sp, r6, lsr #14] @ Save reg + mov pc, r9 +/* + * SVC mode handlers + */ + .align 5 __dabt_svc: sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ save r0 - r12 ldr r2, .LCabt add r0, sp, #S_FRAME_SIZE + ldmia r2, {r2 - r4} @ get pc, cpsr add r5, sp, #S_SP mov r1, lr - ldmia r2, {r2 - r4} @ get pc, cpsr stmia r5, {r0 - r4} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro tst r3, #I_BIT mrseq r0, cpsr @ Enable interrupts if they were @@ -619,29 +458,15 @@ __dabt_svc: sub sp, sp, #S_FRAME_SIZE msr spsr, r0 ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -__dabt_invalid: sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - lr} @ Save SVC r0 - lr [lr *should* be intact] - mov r7, r0 - ldr r4, .LCabt - ldmia r4, {r5, r6} @ Get SVC pc, cpsr - add r4, sp, #S_PC - stmia r4, {r5, r6, r7} @ Save SVC pc, cpsr, old_r0 - mov r0, sp - mov r1, #BAD_DATA - and r2, r6, #31 - b SYMBOL_NAME(bad_mode) - -/*============================================================================= - * Interrupt (IRQ) handler - *----------------------------------------------------------------------------- - */ -__irq_usr: sub sp, sp, #S_FRAME_SIZE + .align 5 +__irq_svc: sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ save r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ - ldr r4, .LCirq - ldmia r4, {r5 - r7} @ get saved PC, SPSR - stmia r8, {r5 - r7} @ save pc, psr, old_r0 + ldr r7, .LCirq + add r5, sp, #S_FRAME_SIZE + ldmia r7, {r7 - r9} + add r4, sp, #S_SP + mov r6, lr + stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro 1: get_irqnr_and_base r0, r6, r5 movne r1, sp @ @@ -649,148 +474,414 @@ __irq_usr: sub sp, sp, #S_FRAME_SIZE @ adrsvc ne, lr, 1b bne do_IRQ - b ret_with_reschedule - - irq_prio_table + ldr r0, [sp, #S_PSR] + msr spsr, r0 + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -__irq_svc: sub sp, sp, #S_FRAME_SIZE + .align 5 +__und_svc: sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ save r0 - r12 + ldr r7, .LCund mov r6, lr - ldr r7, .LCirq ldmia r7, {r7 - r9} add r5, sp, #S_FRAME_SIZE add r4, sp, #S_SP - stmia r4, {r5, r6, r7, r8, r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro + stmia r4, {r5 - r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro + + adrsvc al, r9, 1f @ r9 = normal FP return + bl call_fpe @ lr = undefined instr return + + mov r0, r5 @ unsigned long pc + mov r1, sp @ struct pt_regs *regs + bl SYMBOL_NAME(do_undefinstr) + +1: ldr lr, [sp, #S_PSR] @ Get SVC cpsr + msr spsr, lr + ldmia sp, {r0 - pc}^ @ Restore SVC registers + + .align 5 +.LCirq: .word __temp_irq +.LCund: .word __temp_und +.LCabt: .word __temp_abt +.LCprocfns: .word SYMBOL_NAME(processor) +.LCfp: .word SYMBOL_NAME(fp_enter) +#ifdef CONFIG_ALIGNMENT_TRAP +.LCswi: .word SYMBOL_NAME(cr_alignment) +#endif + + irq_prio_table + +/* + * User mode handlers + */ +#ifdef DEBUG_UNDEF +t: .ascii "Prefetch -> undefined instruction\n\0" + .align +#endif + .align 5 +__dabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go + stmia sp, {r0 - r12} @ save r0 - r12 + ldr r4, .LCabt + add r3, sp, #S_PC + ldmia r4, {r0 - r2} @ Get USR pc, cpsr + stmia r3, {r0 - r2} @ Save USR pc, cpsr, old_r0 + stmdb r3, {sp, lr}^ + +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_abt)] + mcr p15, 0, r7, c1, c0 +#endif + + mov fp, #0 + mrs r2, cpsr @ Enable interrupts if they were + bic r2, r2, #I_BIT @ previously + msr cpsr, r2 + ldr r2, .LCprocfns + mov lr, pc + ldr pc, [r2, #8] @ call processor specific code + mov r3, sp + adrsvc al, lr, ret_from_sys_call + b SYMBOL_NAME(do_DataAbort) + + .align 5 +__irq_usr: sub sp, sp, #S_FRAME_SIZE + stmia sp, {r0 - r12} @ save r0 - r12 + ldr r4, .LCirq + add r8, sp, #S_PC + ldmia r4, {r5 - r7} @ get saved PC, SPSR + stmia r8, {r5 - r7} @ save pc, psr, old_r0 + stmdb r8, {sp, lr}^ + +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_irq)] + mcr p15, 0, r7, c1, c0 +#endif + + mov fp, #0 1: get_irqnr_and_base r0, r6, r5 movne r1, sp + adrsvc ne, lr, 1b @ @ routine called with r0 = irq number, r1 = struct pt_regs * @ - adrsvc ne, lr, 1b bne do_IRQ - ldr r0, [sp, #S_PSR] - msr spsr, r0 - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr - -__irq_invalid: sub sp, sp, #S_FRAME_SIZE @ Allocate space on stack for frame - stmfd sp, {r0 - lr} @ Save r0 - lr - mov r7, #-1 - ldr r4, .LCirq - ldmia r4, {r5, r6} @ get saved pc, psr - add r4, sp, #S_PC - stmia r4, {r5, r6, r7} - mov fp, #0 - mov r0, sp - mov r1, #BAD_IRQ - b SYMBOL_NAME(bad_mode) - -/*============================================================================= - * Undefined instruction handler - *----------------------------------------------------------------------------- - * Handles floating point instructions - */ -.LC2: .word SYMBOL_NAME(fp_enter) + mov r4, #0 + b ret_with_reschedule + .align 5 __und_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go stmia sp, {r0 - r12} @ Save r0 - r12 - add r8, sp, #S_PC - stmdb r8, {sp, lr}^ @ Save user r0 - r12 ldr r4, .LCund + add r8, sp, #S_PC ldmia r4, {r5 - r7} stmia r8, {r5 - r7} @ Save USR pc, cpsr, old_r0 - mov fp, #0 + stmdb r8, {sp, lr}^ @ Save user r0 - r12 - adrsvc al, r9, ret_from_exception @ r9 = normal FP return +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_und)] + mcr p15, 0, r7, c1, c0 +#endif + + mov fp, #0 + adrsvc al, r9, ret_from_sys_call @ r9 = normal FP return adrsvc al, lr, fpundefinstr @ lr = undefined instr return -1: get_current_task r10 +call_fpe: get_current_task r10 mov r8, #1 strb r8, [r10, #TSK_USED_MATH] @ set current->used_math + ldr r4, .LCfp add r10, r10, #TSS_FPESAVE @ r10 = workspace - ldr r4, .LC2 ldr pc, [r4] @ Call FP module USR entry point -__und_svc: sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - r12} @ save r0 - r12 - mov r6, lr - ldr r7, .LCund - ldmia r7, {r7 - r9} - add r5, sp, #S_FRAME_SIZE - add r4, sp, #S_SP - stmia r4, {r5 - r9} @ save sp_SVC, lr_SVC, pc, cpsr, old_ro - - adrsvc al, r9, 3f @ r9 = normal FP return - bl 1b @ lr = undefined instr return - - mov r0, r5 @ unsigned long pc - mov r1, sp @ struct pt_regs *regs - bl SYMBOL_NAME(do_undefinstr) - -3: ldr lr, [sp, #S_PSR] @ Get SVC cpsr - msr spsr, lr - ldmia sp, {r0 - pc}^ @ Restore SVC registers - fpundefinstr: mov r0, lr mov r1, sp mrs r4, cpsr @ Enable interrupts bic r4, r4, #I_BIT msr cpsr, r4 - adrsvc al, lr, ret_from_exception + adrsvc al, lr, ret_from_sys_call b SYMBOL_NAME(do_undefinstr) -__und_invalid: sub sp, sp, #S_FRAME_SIZE - stmia sp, {r0 - lr} - mov r7, r0 - ldr r4, .LCund - ldmia r4, {r5, r6} @ Get UND/IRQ/FIQ/ABT pc, cpsr - add r4, sp, #S_PC - stmia r4, {r5, r6, r7} @ Save UND/IRQ/FIQ/ABT pc, cpsr, old_r0 - mov r0, sp @ struct pt_regs *regs - mov r1, #BAD_UNDEFINSTR @ int reason - and r2, r6, #31 @ int mode - b SYMBOL_NAME(bad_mode) @ Does not ever return... + .align 5 +__pabt_usr: sub sp, sp, #S_FRAME_SIZE @ Allocate frame size in one go + stmia sp, {r0 - r12} @ Save r0 - r12 + ldr r4, .LCabt + add r8, sp, #S_PC + ldmia r4, {r5 - r7} @ Get USR pc, cpsr + stmia r8, {r5 - r7} @ Save USR pc, cpsr, old_r0 + stmdb r8, {sp, lr}^ @ Save sp_usr lr_usr -/* We get here if an undefined instruction happens and the floating - * point emulator is not present. If the offending instruction was - * a WFS, we just perform a normal return as if we had emulated the - * operation. This is a hack to allow some basic userland binaries - * to run so that the emulator module proper can be loaded. --philb - */ -fpe_not_present: - adr r10, wfs_mask_data - ldmia r10, {r4, r5, r6, r7, r8} - ldr r10, [sp, #S_PC] @ Load PC - sub r10, r10, #4 - mask_pc r10, r10 - ldrt r10, [r10] @ get instruction - and r5, r10, r5 - teq r5, r4 @ Is it WFS? - moveq pc, r9 - and r5, r10, r8 - teq r5, r6 @ Is it LDF/STF on sp or fp? - teqne r5, r7 - movne pc, lr - tst r10, #0x00200000 @ Does it have WB - moveq pc, r9 - and r4, r10, #255 @ get offset - and r6, r10, #0x000f0000 - tst r10, #0x00800000 @ +/- - rsbeq r4, r4, #0 - ldr r5, [sp, r6, lsr #14] @ Load reg - add r5, r5, r4, lsl #2 - str r5, [sp, r6, lsr #14] @ Save reg - mov pc, r9 +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, [r4, #OFF_CR_ALIGNMENT(__temp_abt)] + mcr p15, 0, r7, c1, c0 +#endif -wfs_mask_data: .word 0x0e200110 @ WFS - .word 0x0fff0fff - .word 0x0d0d0100 @ LDF [sp]/STF [sp] - .word 0x0d0b0100 @ LDF [fp]/STF [fp] - .word 0x0f0f0f00 + mov fp, #0 + mrs r7, cpsr @ Enable interrupts if they were + bic r7, r7, #I_BIT @ previously + msr cpsr, r7 + mov r0, r5 @ address (pc) + mov r1, sp @ regs + bl SYMBOL_NAME(do_PrefetchAbort) @ call abort handler + teq r0, #0 @ Does this still apply??? + bne ret_from_sys_call @ Return from exception +#ifdef DEBUG_UNDEF + adr r0, t + bl SYMBOL_NAME(printk) +#endif + mov r0, r5 + mov r1, sp + and r2, r6, #31 + bl SYMBOL_NAME(do_undefinstr) + ldr lr, [sp, #S_PSR] @ Get USR cpsr + msr spsr, lr + ldmia sp, {r0 - pc}^ @ Restore USR registers #include "entry-common.S" + .text + +#ifndef __ARM_ARCH_4__ +.Larm700bug: ldr r0, [sp, #S_PSR] @ Get calling cpsr + str lr, [r8] + msr spsr, r0 + ldmia sp, {r0 - lr}^ @ Get calling r0 - lr + mov r0, r0 + ldr lr, [sp, #S_PC] @ Get PC + add sp, sp, #S_FRAME_SIZE + movs pc, lr +#endif + + .section ".text.init",#alloc,#execinstr +/* + * Vector stubs. NOTE that we only align 'vector_IRQ' to a cache line boundary, + * and we rely on each stub being exactly 48 (1.5 cache lines) in size. This + * means that we only ever load two cache lines for this code, or one if we're + * lucky. We also copy this code to 0x200 so that we can use branches in the + * vectors, rather than ldr's. + */ + .align 5 +__stubs_start: +/* + * Interrupt dispatcher + * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC + */ +vector_IRQ: @ + @ save mode specific registers + @ + ldr r13, .LCsirq + sub lr, lr, #4 + str lr, [r13] @ save lr_IRQ + mrs lr, spsr + str lr, [r13, #4] @ save spsr_IRQ + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + and lr, lr, #15 + adr r13, .LCtab_irq + ldr lr, [r13, lr, lsl #2] + movs pc, lr @ Changes mode and branches +/* + * Data abort dispatcher - dispatches it to the correct handler for the processor mode + * Enter in ABT mode, spsr = USR CPSR, lr = USR PC + */ +vector_data: @ + @ save mode specific registers + @ + ldr r13, .LCsabt + sub lr, lr, #8 + str lr, [r13] + mrs lr, spsr + str lr, [r13, #4] + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + and lr, lr, #15 + adr r13, .LCtab_dabt + ldr lr, [r13, lr, lsl #2] + movs pc, lr @ Changes mode and branches + +/* + * Prefetch abort dispatcher - dispatches it to the correct handler for the processor mode + * Enter in ABT mode, spsr = USR CPSR, lr = USR PC + */ +vector_prefetch: + @ + @ save mode specific registers + @ + ldr r13, .LCsabt + sub lr, lr, #4 + str lr, [r13] @ save lr_ABT + mrs lr, spsr + str lr, [r13, #4] @ save spsr_ABT + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + ands lr, lr, #15 + ldreq lr, .LCtab_pabt + ldrne lr, .LCtab_pabt + 4 + movs pc, lr + +/* + * Undef instr entry dispatcher - dispatches it to the correct handler for the processor mode + * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC + */ +vector_undefinstr: + @ + @ save mode specific registers + @ + ldr r13, .LCsund + str lr, [r13] @ save lr_UND + mrs lr, spsr + str lr, [r13, #4] @ save spsr_UND + @ + @ now branch to the relevent MODE handling routine + @ + bic r13, lr, #63 + orr r13, r13, #0x93 + msr spsr, r13 @ switch to SVC_32 mode + + and lr, lr, #15 + adr r13, .LCtab_und + ldr lr, [r13, lr, lsl #2] + movs pc, lr @ Changes mode and branches + +/*============================================================================= + * Undefined FIQs + *----------------------------------------------------------------------------- + * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC + * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg. + * Basically to switch modes, we *HAVE* to clobber one register... brain + * damage alert! I don't think that we can execute any code in here in any + * other mode than FIQ... Ok you can switch to another mode, but you can't + * get out of that mode without clobbering one register. + */ +vector_FIQ: disable_fiq + subs pc, lr, #4 + +/*============================================================================= + * Address exception handler + *----------------------------------------------------------------------------- + * These aren't too critical. + * (they're not supposed to happen, and won't happen in 32-bit data mode). + */ + +vector_addrexcptn: + b vector_addrexcptn + +/* + * We group all the following data together to optimise + * for CPUs with separate I & D caches. + */ + .align 5 + +.LCtab_irq: .word __irq_usr @ 0 (USR_26 / USR_32) + .word __irq_invalid @ 1 (FIQ_26 / FIQ_32) + .word __irq_invalid @ 2 (IRQ_26 / IRQ_32) + .word __irq_svc @ 3 (SVC_26 / SVC_32) + .word __irq_invalid @ 4 + .word __irq_invalid @ 5 + .word __irq_invalid @ 6 + .word __irq_invalid @ 7 + .word __irq_invalid @ 8 + .word __irq_invalid @ 9 + .word __irq_invalid @ a + .word __irq_invalid @ b + .word __irq_invalid @ c + .word __irq_invalid @ d + .word __irq_invalid @ e + .word __irq_invalid @ f + +.LCtab_und: .word __und_usr @ 0 (USR_26 / USR_32) + .word __und_invalid @ 1 (FIQ_26 / FIQ_32) + .word __und_invalid @ 2 (IRQ_26 / IRQ_32) + .word __und_svc @ 3 (SVC_26 / SVC_32) + .word __und_invalid @ 4 + .word __und_invalid @ 5 + .word __und_invalid @ 6 + .word __und_invalid @ 7 + .word __und_invalid @ 8 + .word __und_invalid @ 9 + .word __und_invalid @ a + .word __und_invalid @ b + .word __und_invalid @ c + .word __und_invalid @ d + .word __und_invalid @ e + .word __und_invalid @ f + +.LCtab_dabt: .word __dabt_usr @ 0 (USR_26 / USR_32) + .word __dabt_invalid @ 1 (FIQ_26 / FIQ_32) + .word __dabt_invalid @ 2 (IRQ_26 / IRQ_32) + .word __dabt_svc @ 3 (SVC_26 / SVC_32) + .word __dabt_invalid @ 4 + .word __dabt_invalid @ 5 + .word __dabt_invalid @ 6 + .word __dabt_invalid @ 7 + .word __dabt_invalid @ 8 + .word __dabt_invalid @ 9 + .word __dabt_invalid @ a + .word __dabt_invalid @ b + .word __dabt_invalid @ c + .word __dabt_invalid @ d + .word __dabt_invalid @ e + .word __dabt_invalid @ f + +.LCtab_pabt: .word __pabt_usr + .word __pabt_invalid + +.LCvswi: .word vector_swi + +.LCsirq: .word __temp_irq +.LCsund: .word __temp_und +.LCsabt: .word __temp_abt + +__stubs_end: + + .equ __real_stubs_start, .LCvectors + 0x200 + +.LCvectors: swi SYS_ERROR0 + b __real_stubs_start + (vector_undefinstr - __stubs_start) + ldr pc, __real_stubs_start + (.LCvswi - __stubs_start) + b __real_stubs_start + (vector_prefetch - __stubs_start) + b __real_stubs_start + (vector_data - __stubs_start) + b __real_stubs_start + (vector_addrexcptn - __stubs_start) + b __real_stubs_start + (vector_IRQ - __stubs_start) + b __real_stubs_start + (vector_FIQ - __stubs_start) + +ENTRY(trap_init) + stmfd sp!, {r4 - r6, lr} + + adr r1, .LCvectors @ set up the vectors + mov r0, #0 + ldmia r1, {r1, r2, r3, r4, r5, r6, ip, lr} + stmia r0, {r1, r2, r3, r4, r5, r6, ip, lr} + + add r2, r0, #0x200 + adr r0, __stubs_start @ copy stubs to 0x200 + adr r1, __stubs_end +1: ldr r3, [r0], #4 + str r3, [r2], #4 + cmp r0, r1 + blt 1b + LOADREGS(fd, sp!, {r4 - r6, pc}) + .data +/* + * Do not reorder these, and do not insert extra data between... + */ + __temp_irq: .word 0 @ saved lr_irq .word 0 @ saved spsr_irq .word -1 @ old_r0 @@ -800,3 +891,10 @@ __temp_und: .word 0 @ Saved lr_und __temp_abt: .word 0 @ Saved lr_abt .word 0 @ Saved spsr_abt .word -1 @ old_r0 + + .globl SYMBOL_NAME(cr_alignment) + .globl SYMBOL_NAME(cr_no_alignment) +SYMBOL_NAME(cr_alignment): + .space 4 +SYMBOL_NAME(cr_no_alignment): + .space 4 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index c77c0ea51..2fc0fdddc 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -1,51 +1,54 @@ /*============================================================================ * All exits to user mode from the kernel go through this code. */ - -#include <linux/config.h> - .globl ret_from_sys_call -ret_from_exception: - adr r0, 1f - ldmia r0, {r0, r1} + .align 5 +fast_syscall_return: + str r0, [sp, #S_R0 + 4] @ returned r0 +slow_syscall_return: + add sp, sp, #4 +ret_from_sys_call: + adr r0, bh_data + ldmia r0, {r0, r4} ldr r0, [r0] - ldr r1, [r1] + ldr r1, [r4] tst r0, r1 blne SYMBOL_NAME(do_bottom_half) -ret_from_intr: ldr r0, [sp, #S_PSR] - tst r0, #3 - beq ret_with_reschedule - b ret_from_all +ret_with_reschedule: + get_current_task r1 @ check for scheduling + ldr r0, [r1, #TSK_NEED_RESCHED] + teq r0, #0 + bne ret_reschedule + ldr r1, [r1, #TSK_SIGPENDING] + teq r1, #0 @ check for signals + bne ret_signal + +ret_from_all: restore_user_regs ret_signal: mov r1, sp adrsvc al, lr, ret_from_all + mov r2, r4 b SYMBOL_NAME(do_signal) -2: bl SYMBOL_NAME(schedule) +ret_reschedule: adrsvc al, lr, ret_with_reschedule + b SYMBOL_NAME(schedule) -ret_from_sys_call: - adr r0, 1f + .globl ret_from_exception +ret_from_exception: + adr r0, bh_data ldmia r0, {r0, r1} ldr r0, [r0] ldr r1, [r1] + mov r4, #0 tst r0, r1 - adrsvc ne, lr, ret_from_intr - bne SYMBOL_NAME(do_bottom_half) - -ret_with_reschedule: - get_current_task r1 - ldr r0, [r1, #TSK_NEED_RESCHED] - teq r0, #0 - bne 2b - ldr r1, [r1, #TSK_SIGPENDING] - teq r1, #0 - bne ret_signal - -ret_from_all: restore_user_regs + blne SYMBOL_NAME(do_bottom_half) + ldr r0, [sp, #S_PSR] + tst r0, #3 @ returning to user mode? + beq ret_with_reschedule + b ret_from_all -1: .word SYMBOL_NAME(bh_mask) - .word SYMBOL_NAME(bh_active) +#include "calls.S" /*============================================================================= * SWI handler @@ -57,84 +60,65 @@ ret_from_all: restore_user_regs * too worried. */ -#include "calls.S" - + .align 5 vector_swi: save_user_regs - mov fp, #0 mask_pc lr, lr - ldr r6, [lr, #-4]! @ get SWI instruction + mov fp, #0 + ldr r6, [lr, #-4] @ get SWI instruction arm700_bug_check r6, r7 +#ifdef CONFIG_ALIGNMENT_TRAP + ldr r7, .LCswi + ldr r7, [r7] + mcr p15, 0, r7, c1, c0 +#endif enable_irqs r7 - + + str r4, [sp, #-4]! @ new style: (r0 = arg1, r4 = arg5) + adrsvc al, lr, fast_syscall_return + bic r6, r6, #0xff000000 @ mask off SWI op-code eor r6, r6, #OS_NUMBER<<20 @ check OS number cmp r6, #NR_syscalls @ check upper syscall limit bcs 2f - get_current_task r5 - ldr ip, [r5, #TSK_FLAGS] @ check for syscall tracing + get_current_task r7 + ldr ip, [r7, #TSK_FLAGS] @ check for syscall tracing + adr r5, SYMBOL_NAME(sys_call_table) tst ip, #PF_TRACESYS - bne 1f + ldreq pc, [r5, r6, lsl #2] @ call sys routine - adr ip, SYMBOL_NAME(sys_call_table) - str r4, [sp, #-4]! @ new style: (r0 = arg1, r5 = arg5) - mov lr, pc - ldr pc, [ip, r6, lsl #2] @ call sys routine - add sp, sp, #4 - str r0, [sp, #S_R0] @ returned r0 - b ret_from_sys_call - -1: ldr r7, [sp, #S_IP] @ save old IP + ldr r7, [sp, #S_IP + 4] @ save old IP mov r0, #0 - str r0, [sp, #S_IP] @ trace entry [IP = 0] + str r0, [sp, #S_IP + 4] @ trace entry [IP = 0] bl SYMBOL_NAME(syscall_trace) - str r7, [sp, #S_IP] - ldmia sp, {r0 - r3} @ have to reload r0 - r3 - adr ip, SYMBOL_NAME(sys_call_table) - str r4, [sp, #-4]! @ new style: (r0 = arg1, r5 = arg5) + str r7, [sp, #S_IP + 4] + + ldmib sp, {r0 - r3} @ have to reload r0 - r3 mov lr, pc - ldr pc, [ip, r6, lsl #2] @ call sys routine - add sp, sp, #4 - str r0, [sp, #S_R0] @ returned r0 + ldr pc, [r5, r6, lsl #2] @ call sys routine + str r0, [sp, #S_R0 + 4] @ returned r0 + mov r0, #1 - str r0, [sp, #S_IP] @ trace exit [IP = 1] + str r0, [sp, #S_IP + 4] @ trace exit [IP = 1] bl SYMBOL_NAME(syscall_trace) - str r7, [sp, #S_IP] - b ret_from_sys_call + str r7, [sp, #S_IP + 4] + b slow_syscall_return -2: tst r6, #0x00f00000 @ is it a Unix SWI? +2: add r1, sp, #4 + tst r6, #0x00f00000 @ is it a Unix SWI? bne 3f - cmp r6, #(KSWI_SYS_BASE - KSWI_BASE) - bcc 4f @ not private func - bic r0, r6, #0x000f0000 - mov r1, sp - bl SYMBOL_NAME(arm_syscall) - b ret_from_sys_call - -3: eor r0, r6, #OS_NUMBER<<20 @ Put OS number back - mov r1, sp - bl SYMBOL_NAME(deferred) - ldmfd sp, {r0 - r3} - b ret_from_sys_call - -4: bl SYMBOL_NAME(sys_ni_syscall) - str r0, [sp, #0] @ returned r0 - b ret_from_sys_call + subs r0, r6, #(KSWI_SYS_BASE - KSWI_BASE) + bcs SYMBOL_NAME(arm_syscall) + b SYMBOL_NAME(sys_ni_syscall) @ not private func -@ r0 = syscall number -@ r1 = syscall r0 -@ r5 = syscall r4 -@ ip = syscall table -SYMBOL_NAME(sys_syscall): - mov r6, r0 - eor r6, r6, #OS_NUMBER << 20 - cmp r6, #NR_syscalls @ check range - movgt r0, #-ENOSYS - movgt pc, lr - add sp, sp, #4 @ take of the save of our r4 - ldmib sp, {r0 - r4} @ get our args - str r4, [sp, #-4]! @ Put our arg on the stack - ldr pc, [ip, r6, lsl #2] +3: eor r0, r6, #OS_NUMBER <<20 @ Put OS number back + adrsvc al, lr, slow_syscall_return + b SYMBOL_NAME(deferred) + + .align 5 + +bh_data: .word SYMBOL_NAME(bh_mask) + .word SYMBOL_NAME(bh_active) ENTRY(sys_call_table) #include "calls.S" @@ -142,10 +126,25 @@ ENTRY(sys_call_table) /*============================================================================ * Special system call wrappers */ +@ r0 = syscall number +@ r5 = syscall table +SYMBOL_NAME(sys_syscall): + eor r6, r0, #OS_NUMBER << 20 + cmp r6, #NR_syscalls @ check range + ldmleib sp, {r0 - r4} @ get our args + strle r4, [sp] @ Put our arg on the stack + ldrle pc, [r5, r6, lsl #2] + mov r0, #-ENOSYS + mov pc, lr + sys_fork_wrapper: add r0, sp, #4 b SYMBOL_NAME(sys_fork) +sys_vfork_wrapper: + add r0, sp, #4 + b SYMBOL_NAME(sys_vfork) + sys_execve_wrapper: add r3, sp, #4 b SYMBOL_NAME(sys_execve) @@ -192,99 +191,6 @@ sys_sigaltstack_wrapper: ldr r2, [sp, #4 + S_SP] b do_sigaltstack -/* - *============================================================================= - * Low-level interface code - *----------------------------------------------------------------------------- - * Trap initialisation - *----------------------------------------------------------------------------- - * - * Note - FIQ code has changed. The default is a couple of words in 0x1c, 0x20 - * that call _unexp_fiq. Nowever, we now copy the FIQ routine to 0x1c (removes - * some excess cycles). - * - * What we need to put into 0-0x1c are ldrs to branch to 0xC0000000 - * (the kernel). - * 0x1c onwards is reserved for FIQ, so I think that I will allocate 0xe0 onwards for - * the actual address to jump to. - */ - - .section ".text.init",#alloc,#execinstr - -#if defined(CONFIG_CPU_32) -/* - * these go into 0x00 - */ -.Lbranches: swi SYS_ERROR0 - ldr pc, .Lbranches + 0xe4 - ldr pc, .Lbranches + 0xe8 - ldr pc, .Lbranches + 0xec - ldr pc, .Lbranches + 0xf0 - ldr pc, .Lbranches + 0xf4 - ldr pc, .Lbranches + 0xf8 - ldr pc, .Lbranches + 0xfc -/* - * this is put into 0xe4 and above - */ -.Ljump_addresses: - .word vector_undefinstr @ 0xe4 - .word vector_swi @ 0xe8 - .word vector_prefetch @ 0xec - .word vector_data @ 0xf0 - .word vector_addrexcptn @ 0xf4 - .word vector_IRQ @ 0xf8 - .word _unexp_fiq @ 0xfc -/* - * initialise the trap system - */ -ENTRY(trap_init) - stmfd sp!, {r4 - r7, lr} - initialise_traps_extra - mov r0, #0xe4 - adr r1, .Ljump_addresses - ldmia r1, {r1 - r7} - stmia r0, {r1 - r7} - mov r0, #0 - adr r1, .Lbranches - ldmia r1, {r1 - r7} - stmia r0, {r1 - r7} - LOADREGS(fd, sp!, {r4 - r7, pc}) -#elif defined(CONFIG_CPU_26) -.Ljump_addresses: - swi SYS_ERROR0 - .word vector_undefinstr - 12 - .word vector_swi - 16 - .word vector_prefetch - 20 - .word vector_data - 24 - .word vector_addrexcptn - 28 - .word vector_IRQ - 32 - .word _unexp_fiq - 36 - b . + 8 -/* - * initialise the trap system - */ -ENTRY(trap_init) - stmfd sp!, {r4 - r7, lr} - adr r1, .Ljump_addresses - ldmia r1, {r1 - r7, ip, lr} - orr r2, lr, r2, lsr #2 - orr r3, lr, r3, lsr #2 - orr r4, lr, r4, lsr #2 - orr r5, lr, r5, lsr #2 - orr r6, lr, r6, lsr #2 - orr r7, lr, r7, lsr #2 - orr ip, lr, ip, lsr #2 - mov r0, #0 - stmia r0, {r1 - r7, ip} - ldmfd sp!, {r4 - r7, pc}^ -#endif - - .previous - -/*============================================================================ - * FP support - */ - .data ENTRY(fp_enter) diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index a5da15c7f..e3e87469f 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -2,6 +2,8 @@ * linux/arch/arm/kernel/fiq.c * * Copyright (C) 1998 Russell King + * Copyright (C) 1998, 1999 Phil Blundell + * * FIQ support written by Philip Blundell <philb@gnu.org>, 1998. * * FIQ support re-written by Russell King to be more generic @@ -78,7 +80,7 @@ int fiq_def_op(void *ref, int relinquish) unprotect_page_0(); *(unsigned long *)FIQ_VECTOR = no_fiq_insn; protect_page_0(); - __flush_entry_to_ram(FIQ_VECTOR); + flush_icache_range(FIQ_VECTOR, FIQ_VECTOR + 4); } return 0; @@ -106,28 +108,77 @@ void set_fiq_handler(void *start, unsigned int length) memcpy((void *)FIQ_VECTOR, start, length); protect_page_0(); -#ifdef CONFIG_CPU_32 - processor.u.armv3v4._flush_cache_area(FIQ_VECTOR, FIQ_VECTOR + length, 1); -#endif + flush_icache_range(FIQ_VECTOR, FIQ_VECTOR + length); } +/* + * Taking an interrupt in FIQ mode is death, so both these functions + * disable irqs for the duration. + */ void set_fiq_regs(struct pt_regs *regs) { - /* not yet - - * this is temporary to get the floppy working - * again on RiscPC. It *will* become more - * generic. - */ -#ifdef CONFIG_ARCH_ACORN - extern void floppy_fiqsetup(unsigned long len, unsigned long addr, - unsigned long port); - floppy_fiqsetup(regs->ARM_r9, regs->ARM_r10, regs->ARM_fp); + register unsigned long tmp, tmp2; + __asm__ volatile ( +#ifdef CONFIG_CPU_26 + "mov %0, pc + bic %1, %0, #0x3 + orr %1, %1, #0x0c000001 + teqp %1, #0 @ select FIQ mode + mov r0, r0 + ldmia %2, {r8 - r14} + teqp %0, #0 @ return to SVC mode + mov r0, r0" #endif +#ifdef CONFIG_CPU_32 + "mrs %0, cpsr + bic %1, %0, #0xf + orr %1, %1, #0xc1 + msr cpsr, %1 @ select FIQ mode + mov r0, r0 + ldmia %2, {r8 - r14} + msr cpsr, %0 @ return to SVC mode + mov r0, r0" +#endif + : "=r" (tmp), "=r" (tmp2) + : "r" (®s->ARM_r8) + /* These registers aren't modified by the above code in a way + visible to the compiler, but we mark them as clobbers anyway + so that GCC won't put any of the input or output operands in + them. */ + : "r8", "r9", "r10", "r11", "r12", "r13", "r14"); } void get_fiq_regs(struct pt_regs *regs) { - /* not yet */ + register unsigned long tmp, tmp2; + __asm__ volatile ( +#ifdef CONFIG_CPU_26 + "mov %0, pc + bic %1, %0, #0x3 + orr %1, %1, #0x0c000001 + teqp %1, #0 @ select FIQ mode + mov r0, r0 + stmia %2, {r8 - r14} + teqp %0, #0 @ return to SVC mode + mov r0, r0" +#endif +#ifdef CONFIG_CPU_32 + "mrs %0, cpsr + bic %1, %0, #0xf + orr %1, %1, #0xc1 + msr cpsr, %1 @ select FIQ mode + mov r0, r0 + stmia %2, {r8 - r14} + msr cpsr, %0 @ return to SVC mode + mov r0, r0" +#endif + : "=r" (tmp), "=r" (tmp2) + : "r" (®s->ARM_r8) + /* These registers aren't modified by the above code in a way + visible to the compiler, but we mark them as clobbers anyway + so that GCC won't put any of the input or output operands in + them. */ + : "r8", "r9", "r10", "r11", "r12", "r13", "r14"); } int claim_fiq(struct fiq_handler *f) diff --git a/arch/arm/kernel/head-armv.S b/arch/arm/kernel/head-armv.S index cd4be86cb..2e13f0818 100644 --- a/arch/arm/kernel/head-armv.S +++ b/arch/arm/kernel/head-armv.S @@ -7,13 +7,21 @@ */ #include <linux/config.h> #include <linux/linkage.h> +#include <asm/hardware.h> +#include <asm/dec21285.h> + + .globl SYMBOL_NAME(swapper_pg_dir) + .equ SYMBOL_NAME(swapper_pg_dir), TEXTADDR - 0x4000 + + .section ".text.init",#alloc,#execinstr +ENTRY(stext) +ENTRY(_stext) -#ifndef CONFIG_ARCH_VNC #if (TEXTADDR & 0xffff) != 0x8000 #error TEXTADDR must start at 0xXXXX8000 #endif -#else - .text + +#ifdef CONFIG_ARCH_NETWINDER mov r0, r0 mov r0, r0 mov r0, r0 @@ -22,16 +30,34 @@ mov r0, r0 mov r0, r0 mov r0, r0 + + adr r2, 1f + ldmdb r2, {r7, r8} + and r3, r2, #0x0000c000 + teq r3, #0x00008000 + beq __entry + bic r3, r2, #0xc000 + orr r3, r3, #0x8000 + mov r0, r3 + mov r4, #32 + sub r5, r8, r7 + b 1f + + .word _stext + .word _end + +1: ldmia r2!, {r6, r7, r8, r9} + stmia r3!, {r6, r7, r8, r9} + subs r4, r4, #16 + bcs 1b + movs r4, r5 + mov r5, #0 + movne pc, r0 + mov r0, #0 mov r1, #5 #endif -#define DEBUG - - .globl SYMBOL_NAME(swapper_pg_dir) - .equ SYMBOL_NAME(swapper_pg_dir), TEXTADDR - 0x4000 - - .text /* * Entry point and restart point. Entry *must* be called with r0 == 0, * MMU off. Note! These should be unique!!! Please read Documentation/ARM-README @@ -45,16 +71,15 @@ * r1 = 5 -> Corel Netwinder * r1 = 6 -> CATS * r1 = 7 -> tbox + * r1 = 8 -> SA110/21285 as co-processor */ -ENTRY(stext) -ENTRY(_stext) __entry: teq r0, #0 @ check for illegal entry... bne .Lerror @ loop indefinitely - cmp r1, #8 @ Unknown machine architecture + cmp r1, #9 @ Unknown machine architecture bge .Lerror -/* First thing to do is to get the page tables set up so that we can call the kernel - * in the correct place. This is relocatable code... +/* First thing to do is to get the page tables set up so that we can call + * the kernel in the correct place. This is relocatable code... * - Read processor ID register (CP#15, CR0). */ mrc p15, 0, r9, c0, c0 @ get Processor ID @@ -74,7 +99,7 @@ __entry: teq r0, #0 @ check for illegal entry... adr r4, .LCMachTypes add r4, r4, r1, lsl #4 - ldmia r4, {r4, r5, r6} + ldmia r4, {r4, r5, r6, r7} /* * r4 = page dir in physical ram * r5 = physical address of start of RAM @@ -99,26 +124,28 @@ __entry: teq r0, #0 @ check for illegal entry... add r3, r3, #1 << 20 str r3, [r0], #4 add r3, r3, #1 << 20 -#ifdef DEBUG +#ifdef CONFIG_DEBUG_LL /* Map in IO space * This allows debug messages to be output via a serial * before/while paging_init. */ - add r0, r4, #0x3800 + add r0, r4, r7 orr r3, r6, r8 add r2, r0, #0x0800 1: str r3, [r0], #4 add r3, r3, #1 << 20 teq r0, r2 bne 1b -#ifdef CONFIG_ARCH_VNC - add r0, r4, #0x3f00 - add r0, r0, #0x00f8 +#ifdef CONFIG_ARCH_NETWINDER + teq r1, #5 + bne 1f + add r0, r4, #0x3fc0 mov r3, #0x7c000000 orr r3, r3, r8 str r3, [r0], #4 add r3, r3, #1 << 20 str r3, [r0], #4 +1: #endif #endif #ifdef CONFIG_ARCH_RPC @@ -168,49 +195,55 @@ __entry: teq r0, #0 @ check for illegal entry... .LCMachTypes: .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0xe0000000 @ I/O address - .long 0 + .long 0x3800 @ Acorn RiscPC .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x10000000 .long 0x10000000 .long 0x03000000 - .long 0 + .long 0x3800 @ EBSIT ??? .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 .long 0 .long 0xe0000000 - .long 0 + .long 0x3800 @ NexusPCI .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x40000000 .long 0x40000000 .long 0x10000000 - .long 0 + .long 0x3800 @ DEC EBSA285 .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0x24000000 @ I/O base address (0x42000000 -> 0xFE000000) - .long 0 + .long 0x3800 @ Corel VNC .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0x24000000 @ I/O base address (0x42000000 -> 0xfe000000) - .long 0 + .long 0x3800 @ CATS .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical) .long 0 @ Address of RAM .long 0x24000000 @ I/O base address (0x42000000 -> 0xfe000000) - .long 0 + .long 0x3800 @ tbox .long SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x80000000 .long 0x80000000 @ Address of RAM .long 0x00400000 @ Uart - .long 0 + .long 0x3800 + + @ DEC EBSA285 as co-processor + .long 0x4000 @ Address of page tables (physical) + .long 0 @ Address of RAM + .long DC21285_ARMCSR_BASE @ Physical I/O base address + .long 0x7cf00000 >> 18 @ Virtual I/O base address .LCProcTypes: @ ARM6 / 610 .long 0x41560600 @@ -250,7 +283,11 @@ __entry: teq r0, #0 @ check for illegal entry... mcr p15, 0, r4, c2, c0 @ load page table pointer mov r0, #0x1f @ Domains 0, 1 = client mcr p15, 0, r0, c3, c0 @ load domain access register +#ifdef CONFIG_ALIGNMENT_TRAP + mov r0, #0x3f @ ....S..DPWCAM +#else mov r0, #0x3d @ ....S..DPWC.M +#endif orr r0, r0, #0x100 mov pc, lr @@ -261,7 +298,11 @@ __entry: teq r0, #0 @ check for illegal entry... mcr p15, 0, r4, c2, c0 @ load page table pointer mov r0, #0x1f @ Domains 0, 1 = client mcr p15, 0, r0, c3, c0 @ load domain access register +#ifdef CONFIG_ALIGNMENT_TRAP + mov r0, #0x7f @ ....S.LDPWCAM +#else mov r0, #0x7d @ ....S.LDPWC.M +#endif orr r0, r0, #0x100 mov pc, lr @@ -276,32 +317,38 @@ __entry: teq r0, #0 @ check for illegal entry... mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, #0x0e00 bic r0, r0, #0x0002 +#ifdef CONFIG_ALIGNMENT_TRAP + orr r0, r0, #0x003f @ I...S..DPWCAM +#else orr r0, r0, #0x003d @ I...S..DPWC.M +#endif orr r0, r0, #0x1100 @ v4 supports separate I cache mov pc, lr - .section ".text.init",#alloc,#execinstr - .Lsa_fastclock: mcr p15, 0, r4, c15, c1, 2 @ Enable clock switching mov pc, lr .LC0: .long SYMBOL_NAME(__entry) - .long SYMBOL_NAME(machine_type) + .long SYMBOL_NAME(__machine_arch_type) .long SYMBOL_NAME(__bss_start) .long SYMBOL_NAME(processor_id) .long SYMBOL_NAME(_end) + .long SYMBOL_NAME(cr_alignment) .long SYMBOL_NAME(init_task_union)+8192 .align .Lalready_done_mmap: adr r4, .LC0 - ldmia r4, {r3, r4, r5, r6, r8, sp} @ Setup stack + ldmia r4, {r3, r4, r5, r6, r7, r8, sp} @ Setup stack add r10, r10, r3 @ Add base back in mov fp, #0 -1: cmp r5, r8 @ Clear BSS +1: cmp r5, r7 @ Clear BSS strcc fp, [r5],#4 bcc 1b + bic r2, r0, #2 @ Clear 'A' bit + stmia r8, {r0, r2} @ Save control register values + str r1, [r4] @ Save machine type str r9, [r6] @ Save processor ID mov lr, pc @@ -310,10 +357,12 @@ __entry: teq r0, #0 @ check for illegal entry... b SYMBOL_NAME(start_kernel) .text -#ifdef DEBUG + +#ifdef CONFIG_DEBUG_LL /* * Some debugging routines (useful if you've got MM problems and - * printk isn't working). For DEBUGGING ONLY!!! + * printk isn't working). For DEBUGGING ONLY!!! Do not leave + * references to these in a production kernel! */ #if defined(CONFIG_ARCH_RPC) .macro addruart,rx @@ -362,64 +411,71 @@ __entry: teq r0, #0 @ check for illegal entry... beq 1001b .endm -#elif defined(CONFIG_ARCH_EBSA285) +#elif defined(CONFIG_HOST_FOOTBRIDGE) || defined(CONFIG_ADDIN_FOOTBRIDGE) +#ifndef CONFIG_DEBUG_DC21285_PORT + /* For NetWinder debugging */ .macro addruart,rx - mov \rx, #0xfe000000 + mov \rx, #0xff000000 + orr \rx, \rx, #0x000003f8 .endm .macro senduart,rd,rx - str \rd, [\rx, #0x160] @ UARTDR + strb \rd, [\rx] .endm .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #0x178] @ UARTFLG - tst \rd, #1 << 3 - bne 1001b +1002: ldrb \rd, [\rx, #0x5] + and \rd, \rd, #0x60 + teq \rd, #0x60 + bne 1002b .endm .macro waituart,rd,rx +1001: ldrb \rd, [\rx, #0x6] + tst \rd, #0x10 + beq 1001b .endm +#else + /* For EBSA285 debugging */ + .equ dc21285_high, ARMCSR_BASE & 0xff000000 + .equ dc21285_low, ARMCSR_BASE & 0x00ffffff -#elif defined(CONFIG_ARCH_NEXUSPCI) .macro addruart,rx - ldr \rx, =0xfff00000 + mov \rx, #dc21285_high + .if dc21285_low + orr \rx, \rx, #dc21285_low + .endif .endm .macro senduart,rd,rx - str \rd, [\rx, #0xc] + str \rd, [\rx, #0x160] @ UARTDR .endm .macro busyuart,rd,rx -1001: ldr \rd, [\rx, #0x4] - tst \rd, #1 << 0 +1001: ldr \rd, [\rx, #0x178] @ UARTFLG + tst \rd, #1 << 3 bne 1001b .endm .macro waituart,rd,rx .endm - -#elif defined(CONFIG_ARCH_VNC) +#endif +#elif defined(CONFIG_ARCH_NEXUSPCI) .macro addruart,rx - mov \rx, #0xff000000 - orr \rx, \rx, #0x00e00000 - orr \rx, \rx, #0x000003f8 + ldr \rx, =0xfff00000 .endm .macro senduart,rd,rx - strb \rd, [\rx] + str \rd, [\rx, #0xc] .endm .macro busyuart,rd,rx -1002: ldrb \rd, [\rx, #0x5] - and \rd, \rd, #0x60 - teq \rd, #0x60 - bne 1002b +1001: ldr \rd, [\rx, #0x4] + tst \rd, #1 << 0 + bne 1001b .endm .macro waituart,rd,rx -1001: ldrb \rd, [\rx, #0x6] - tst \rd, #0x10 - beq 1001b .endm #else #error Unknown architecture @@ -476,8 +532,6 @@ ENTRY(printch) mov r0, #0 b 1b - .ltorg - .bss hexbuf: .space 16 diff --git a/arch/arm/kernel/hw-ebsa285.c b/arch/arm/kernel/hw-ebsa285.c deleted file mode 100644 index e3385696b..000000000 --- a/arch/arm/kernel/hw-ebsa285.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * arch/arm/kernel/hw-ebsa286.c - * - * EBSA285 hardware specific functions - * - * Copyright (C) 1998 Russell King, Phil Blundel - */ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/ptrace.h> -#include <linux/interrupt.h> -#include <linux/init.h> - -#include <asm/irq.h> -#include <asm/system.h> - -extern int setup_arm_irq(int, struct irqaction *); - -extern void pci_set_cmd(struct pci_dev *dev, unsigned short clear, unsigned short set); -extern void pci_set_base_addr(struct pci_dev *dev, int idx, unsigned int addr); -extern void pci_set_irq_line(struct pci_dev *dev, unsigned int irq); - -static int irqmap_ebsa[] __initdata = { 9, 8, 18, 11 }; -static int irqmap_cats[] __initdata = { 18, 8, 9, 11 }; - -__initfunc(static int ebsa_irqval(struct pci_dev *dev)) -{ - unsigned char pin; - - pcibios_read_config_byte(dev->bus->number, - dev->devfn, - PCI_INTERRUPT_PIN, - &pin); - - return irqmap_ebsa[(PCI_SLOT(dev->devfn) + pin) & 3]; -} - -__initfunc(static int cats_irqval(struct pci_dev *dev)) -{ - if (dev->irq >= 128) - return 32 + (dev->irq & 0x1f); - - switch (dev->irq) { - case 1: - case 2: - case 3: - case 4: - return irqmap_cats[dev->irq - 1]; - case 0: - return 0; - } - - printk("PCI: device %02x:%02x has unknown irq line %x\n", - dev->bus->number, dev->devfn, dev->irq); - return 0; -} - -__initfunc(void pcibios_fixup_ebsa285(struct pci_dev *dev)) -{ - char cmd; - - /* sort out the irq mapping for this device */ - switch (machine_type) { - case MACH_TYPE_EBSA285: - dev->irq = ebsa_irqval(dev); - break; - case MACH_TYPE_CATS: - dev->irq = cats_irqval(dev); - break; - } - - /* Turn on bus mastering - boot loader doesn't - * - perhaps it should! - dag - */ - pci_read_config_byte(dev, PCI_COMMAND, &cmd); - pci_write_config_byte(dev, PCI_COMMAND, cmd | PCI_COMMAND_MASTER); -} - -static void irq_pci_err(int irq, void *dev_id, struct pt_regs *regs) -{ - const char *err = "unknown"; - unsigned long cmd = *(unsigned long *)0xfe000004 & 0xffff; - unsigned long ctrl = *(unsigned long *)0xfe00013c & 0xffffde07; - static unsigned long next_warn[7]; - int idx = 6; - - switch(irq) { - case IRQ_PCIPARITY: - *(unsigned long *)0xfe000004 = cmd | 1 << 31; - idx = 0; - err = "parity"; - break; - - case IRQ_PCITARGETABORT: - *(unsigned long *)0xfe000004 = cmd | 1 << 28; - idx = 1; - err = "target abort"; - break; - - case IRQ_PCIMASTERABORT: - *(unsigned long *)0xfe000004 = cmd | 1 << 29; - idx = 2; - err = "master abort"; - break; - - case IRQ_PCIDATAPARITY: - *(unsigned long *)0xfe000004 = cmd | 1 << 24; - idx = 3; - err = "data parity"; - break; - - case IRQ_DISCARDTIMER: - *(unsigned long *)0xfe00013c = ctrl | 1 << 8; - idx = 4; - err = "discard timer"; - break; - - case IRQ_SERR: - *(unsigned long *)0xfe00013c = ctrl | 1 << 3; - idx = 5; - err = "system"; - break; - } - if (time_after_eq(jiffies, next_warn[idx])) { - next_warn[idx] = jiffies + 3 * HZ / 100; - printk(KERN_ERR "PCI %s error detected\n", err); - } -} - -static struct irqaction irq_pci_error = { - irq_pci_err, SA_INTERRUPT, 0, "PCI error", NULL, NULL -}; - -__initfunc(void pcibios_init_ebsa285(void)) -{ - setup_arm_irq(IRQ_PCIPARITY, &irq_pci_error); - setup_arm_irq(IRQ_PCITARGETABORT, &irq_pci_error); - setup_arm_irq(IRQ_PCIMASTERABORT, &irq_pci_error); - setup_arm_irq(IRQ_PCIDATAPARITY, &irq_pci_error); - setup_arm_irq(IRQ_DISCARDTIMER, &irq_pci_error); - setup_arm_irq(IRQ_SERR, &irq_pci_error); - - /* - * Map our SDRAM at a known address in PCI space, just in case - * the firmware had other ideas. Using a nonzero base is slightly - * bizarre but apparently necessary to avoid problems with some - * video cards. - * - * We should really only do this if the central function is enabled. - */ - *(unsigned long *)0xfe000010 = 0; - *(unsigned long *)0xfe000018 = 0xe0000000; - *(unsigned long *)0xfe0000f8 = 0; - *(unsigned long *)0xfe0000fc = 0; - *(unsigned long *)0xfe000100 = 0x01fc0000; - *(unsigned long *)0xfe000104 = 0; - *(unsigned long *)0xfe000108 = 0x80000000; - *(unsigned long *)0xfe000004 = 0x17; -} diff --git a/arch/arm/kernel/hw-footbridge.c b/arch/arm/kernel/hw-footbridge.c new file mode 100644 index 000000000..857f120e1 --- /dev/null +++ b/arch/arm/kernel/hw-footbridge.c @@ -0,0 +1,893 @@ +/* + * arch/arm/kernel/hw-footbridge.c + * + * Footbridge-dependent machine fixup + * + * Copyright (C) 1998, 1999 Russell King, Phil Blundell + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/ptrace.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/init.h> + +#include <asm/dec21285.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/leds.h> +#include <asm/system.h> + +#define IRDA_IO_BASE 0x180 +#define ETHER10_IO_BASE 0x301 +#define GP1_IO_BASE 0x338 +#define GP2_IO_BASE 0x33a +#define DEC21143_IO_BASE 0x401 +#define DEC21143_MEM_BASE 0x00800000 +#define CYBER2000_MEM_BASE 0x01000000 + +int have_isa_bridge; + +extern int setup_arm_irq(int, struct irqaction *); +extern void pci_set_cmd(struct pci_dev *dev, unsigned short clear, unsigned short set); +extern void pci_set_base_addr(struct pci_dev *dev, int idx, unsigned int addr); +extern void pci_set_irq_line(struct pci_dev *dev, unsigned int irq); +extern void (*kd_mksound)(unsigned int hz, unsigned int ticks); + +#ifdef CONFIG_PCI + +static int irqmap_ebsa[] __initdata = { IRQ_IN1, IRQ_IN0, IRQ_PCI, IRQ_IN3 }; + +__initfunc(static int ebsa_irqval(struct pci_dev *dev)) +{ + unsigned char pin; + + pcibios_read_config_byte(dev->bus->number, + dev->devfn, + PCI_INTERRUPT_PIN, + &pin); + + return irqmap_ebsa[(PCI_SLOT(dev->devfn) + pin) & 3]; +} + +#ifdef CONFIG_CATS +static int irqmap_cats[] __initdata = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; + +__initfunc(static int cats_irqval(struct pci_dev *dev)) +{ + if (dev->irq >= 128) + return 16 + (dev->irq & 0x1f); + + switch (dev->irq) { + case 1: + case 2: + case 3: + case 4: + return irqmap_cats[dev->irq - 1]; + case 0: + return 0; + } + + printk("PCI: device %02x:%02x has unknown irq line %x\n", + dev->bus->number, dev->devfn, dev->irq); + return 0; +} +#endif + +__initfunc(void pcibios_fixup_ebsa285(struct pci_dev *dev)) +{ + /* Latency timer of 32 */ + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 32); + + /* 32-byte cache line size */ + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 8); + + /* Set SysErr enable, Parity enable */ + pci_set_cmd(dev, 0, PCI_COMMAND_FAST_BACK | PCI_COMMAND_SERR | PCI_COMMAND_PARITY); + + /* If this device is an ISA bridge, set the + * have_isa_bridge flag. We will then go looking + * for things like keyboard, etc + */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA || + (dev->class >> 8) == PCI_CLASS_BRIDGE_EISA) + have_isa_bridge = !0; + + /* sort out the irq mapping for this device */ + switch (machine_arch_type) { + case MACH_TYPE_EBSA285: + dev->irq = ebsa_irqval(dev); + /* Turn on bus mastering - boot loader doesn't + * - perhaps it should! - dag + */ + pci_set_cmd(dev, 0, PCI_COMMAND_MASTER); + break; + +#ifdef CONFIG_CATS + case MACH_TYPE_CATS: + dev->irq = cats_irqval(dev); + /* Turn on bus mastering - boot loader doesn't + * - perhaps it should! - dag + */ + pci_set_cmd(dev, 0, PCI_COMMAND_MASTER); + break; +#endif +#ifdef CONFIG_ARCH_NETWINDER + case MACH_TYPE_NETWINDER: + /* disable ROM */ + pci_write_config_dword(dev, PCI_ROM_ADDRESS, 0); + +#define DEV(v,d) ((v)<<16|(d)) + switch (DEV(dev->vendor, dev->device)) { + /* Ether 100 */ + case DEV(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21142): + pci_set_base_addr(dev, 0, DEC21143_IO_BASE); + pci_set_base_addr(dev, 1, DEC21143_MEM_BASE); + pci_set_cmd(dev, 0, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY | PCI_COMMAND_IO); + /* Put the chip to sleep in case the driver isn't loaded */ + pci_write_config_dword(dev, 0x40, 0x80000000); + dev->irq = IRQ_NETWINDER_ETHER100; + break; + + /* Ether 10 */ + case DEV(PCI_VENDOR_ID_WINBOND2,0x5a5a): + pci_set_base_addr(dev, 0, ETHER10_IO_BASE); + pci_set_cmd(dev, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY, PCI_COMMAND_IO); + dev->irq = IRQ_NETWINDER_ETHER10; + break; + + /* ISA bridge */ + case DEV(PCI_VENDOR_ID_WINBOND,PCI_DEVICE_ID_WINBOND_83C553): + pci_set_base_addr(dev, 0, 0); + pci_set_cmd(dev, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY, PCI_COMMAND_IO); + /* + * Enable all memory requests from ISA to be channeled to PCI + */ + pci_write_config_byte(dev, 0x48, 255); + /* + * Disable ping-pong (as per errata) + */ + pci_write_config_byte(dev, 0x42, 0); + /* + * Enable PCI packet retry + */ + pci_write_config_byte(dev, 0x40, 0x22); + /* + * Do not use PCI CPU park enable, park on + * last master, disable GAT bit + */ + pci_write_config_byte(dev, 0x83, 0x02); + /* + * Default rotating priorities + */ + pci_write_config_byte(dev, 0x80, 0xe0); + /* + * Rotate bank 4 + */ + pci_write_config_byte(dev, 0x81, 0x01); + break; + + /* IDE */ + case DEV(PCI_VENDOR_ID_WINBOND,PCI_DEVICE_ID_WINBOND_82C105): + pci_set_base_addr(dev, 0, 0x1f1); + pci_set_base_addr(dev, 1, 0x3f5); + pci_set_base_addr(dev, 2, 0x171); + pci_set_base_addr(dev, 3, 0x375); + pci_set_base_addr(dev, 4, 0xe801); + pci_set_cmd(dev, PCI_COMMAND_MEMORY, PCI_COMMAND_MASTER | PCI_COMMAND_IO); + dev->irq = IRQ_ISA_HARDDISK1; + break; + + /* VGA */ + case DEV(PCI_VENDOR_ID_INTERG,0x2000): + pci_set_base_addr(dev, 0, CYBER2000_MEM_BASE); + pci_set_cmd(dev, PCI_COMMAND_MASTER, PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + dev->irq = IRQ_NETWINDER_VGA; + break; + } +#endif + } +} + +static inline void +report_pci_dev_error(void) +{ + struct pci_dev *dev; + + for (dev = pci_devices; dev; dev = dev->next) { + unsigned short status; + + pci_read_config_word(dev, PCI_STATUS, &status); + if (status & 0xf900) { + printk(KERN_DEBUG "PCI: [%04X:%04X] status = %X\n", + dev->vendor, dev->device, status); + + pci_write_config_word(dev, PCI_STATUS, status & 0xf900); + } + } +} +#else +#define report_pci_dev_error() +#endif + +/* + * Warn on PCI errors. Please report any occurances! + */ +static void +irq_pci_err(int irq, void *dev_id, struct pt_regs *regs) +{ + static unsigned long next_warn; + unsigned long cmd = *CSR_PCICMD & 0x0000ffff; + unsigned long ctrl = (*CSR_SA110_CNTL) & 0xffffde07; + unsigned long irqstatus = *CSR_IRQ_RAWSTATUS; + int warn = time_after_eq(jiffies, next_warn); + + ctrl |= SA110_CNTL_DISCARDTIMER; + + if (warn) { + next_warn = jiffies + 3 * HZ / 100; + printk(KERN_DEBUG "PCI: "); + } + + if (irqstatus & (1 << 31)) { + if (warn) + printk("parity error "); + cmd |= 1 << 31; + } + + if (irqstatus & (1 << 30)) { + if (warn) + printk("target abort "); + cmd |= 1 << 28; + } + + if (irqstatus & (1 << 29)) { + if (warn) + printk("master abort "); + cmd |= 1 << 29; + } + + if (irqstatus & (1 << 28)) { + if (warn) + printk("data parity error "); + cmd |= 1 << 24; + } + + if (irqstatus & (1 << 27)) { + if (warn) + printk("discard timer expired "); + ctrl &= ~SA110_CNTL_DISCARDTIMER; + } + + if (irqstatus & (1 << 23)) { + if (warn) + printk("system error "); + ctrl |= SA110_CNTL_RXSERR; + } + + if (warn) + printk("pc=[<%08lX>]\n", instruction_pointer(regs)); + + report_pci_dev_error(); + + *CSR_PCICMD = cmd; + *CSR_SA110_CNTL = ctrl; +} + +static struct irqaction irq_pci_error = { + irq_pci_err, SA_INTERRUPT, 0, "PCI error", NULL, NULL +}; + +__initfunc(void pcibios_init_ebsa285(void)) +{ + setup_arm_irq(IRQ_PCI_ERR, &irq_pci_error); +} + +/* + * Netwinder stuff + */ +#ifdef CONFIG_ARCH_NETWINDER + +/* + * Winbond WB83977F accessibility stuff + */ +static inline void wb977_open(void) +{ + outb(0x87, 0x370); + outb(0x87, 0x370); +} + +static inline void wb977_close(void) +{ + outb(0xaa, 0x370); +} + +static inline void wb977_wb(int reg, int val) +{ + outb(reg, 0x370); + outb(val, 0x371); +} + +static inline void wb977_ww(int reg, int val) +{ + outb(reg, 0x370); + outb(val >> 8, 0x371); + outb(reg + 1, 0x370); + outb(val, 0x371); +} + +#define wb977_device_select(dev) wb977_wb(0x07, dev) +#define wb977_device_disable() wb977_wb(0x30, 0x00) +#define wb977_device_enable() wb977_wb(0x30, 0x01) + +/* + * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE + */ +spinlock_t __netwinder_data gpio_lock = SPIN_LOCK_UNLOCKED; + +static unsigned int __netwinder_data current_gpio_op = 0; +static unsigned int __netwinder_data current_gpio_io = 0; +static unsigned int __netwinder_data current_cpld = 0; + +void __netwinder_text gpio_modify_op(int mask, int set) +{ + unsigned int new_gpio, changed; + + new_gpio = (current_gpio_op & ~mask) | set; + changed = new_gpio ^ current_gpio_op; + current_gpio_op = new_gpio; + + if (changed & 0xff) + outb(new_gpio, GP1_IO_BASE); + if (changed & 0xff00) + outb(new_gpio >> 8, GP2_IO_BASE); +} + +static inline void __gpio_modify_io(int mask, int in) +{ + unsigned int new_gpio, changed; + int port; + + new_gpio = (current_gpio_io & ~mask) | in; + changed = new_gpio ^ current_gpio_io; + current_gpio_io = new_gpio; + + changed >>= 1; + new_gpio >>= 1; + + wb977_device_select(7); + + for (port = 0xe1; changed && port < 0xe8; changed >>= 1) { + wb977_wb(port, new_gpio & 1); + + port += 1; + new_gpio >>= 1; + } + + wb977_device_select(8); + + for (port = 0xe8; changed && port < 0xec; changed >>= 1) { + wb977_wb(port, new_gpio & 1); + + port += 1; + new_gpio >>= 1; + } +} + +void __netwinder_text gpio_modify_io(int mask, int in) +{ + /* Open up the SuperIO chip */ + wb977_open(); + + __gpio_modify_io(mask, in); + + /* Close up the EFER gate */ + wb977_close(); +} + +int __netwinder_text gpio_read(void) +{ + return inb(GP1_IO_BASE) | inb(GP2_IO_BASE) << 8; +} + +/* + * Initialise the Winbond W83977F global registers + */ +static inline void wb977_init_global(void) +{ + /* + * Enable R/W config registers + */ + wb977_wb(0x26, 0x40); + + /* + * Power down FDC (not used) + */ + wb977_wb(0x22, 0xfe); + + /* + * GP12, GP11, CIRRX, IRRXH, GP10 + */ + wb977_wb(0x2a, 0xc1); + + /* + * GP23, GP22, GP21, GP20, GP13 + */ + wb977_wb(0x2b, 0x6b); + + /* + * GP17, GP16, GP15, GP14 + */ + wb977_wb(0x2c, 0x55); +} + +/* + * Initialise the Winbond W83977F printer port + */ +static inline void wb977_init_printer(void) +{ + wb977_device_select(1); + + /* + * mode 1 == EPP + */ + wb977_wb(0xf0, 0x01); +} + +/* + * Initialise the Winbond W83977F keyboard controller + */ +static inline void wb977_init_keyboard(void) +{ + wb977_device_select(5); + + /* + * Keyboard controller address + */ + wb977_ww(0x60, 0x0060); + wb977_ww(0x62, 0x0064); + + /* + * Keyboard IRQ 1, active high, edge trigger + */ + wb977_wb(0x70, 1); + wb977_wb(0x71, 0x02); + + /* + * Mouse IRQ 5, active high, edge trigger + */ + wb977_wb(0x72, 5); + wb977_wb(0x73, 0x02); + + /* + * KBC 8MHz + */ + wb977_wb(0xf0, 0x40); + + /* + * Enable device + */ + wb977_device_enable(); +} + +/* + * Initialise the Winbond W83977F Infra-Red device + */ +static inline void wb977_init_irda(void) +{ + wb977_device_select(6); + + /* + * IR base address + */ + wb977_ww(0x60, IRDA_IO_BASE); + + /* + * IRDA IRQ 6, active high, edge trigger + */ + wb977_wb(0x70, 6); + wb977_wb(0x71, 0x02); + + /* + * RX DMA - ISA DMA 0 + */ + wb977_wb(0x74, 0x00); + + /* + * TX DMA - Disable Tx DMA + */ + wb977_wb(0x75, 0x04); + + /* + * Append CRC, Enable bank selection + */ + wb977_wb(0xf0, 0x03); + + /* + * Enable device + */ + wb977_device_enable(); +} + +/* + * Initialise Winbond W83977F general purpose IO + */ +static inline void wb977_init_gpio(void) +{ + unsigned long flags; + + /* + * Set up initial I/O definitions + */ + current_gpio_io = -1; + __gpio_modify_io(-1, GPIO_DONE | GPIO_WDTIMER); + + wb977_device_select(7); + + /* + * Group1 base address + */ + wb977_ww(0x60, GP1_IO_BASE); + wb977_ww(0x62, 0); + wb977_ww(0x64, 0); + + /* + * GP10 (Orage button) IRQ 10, active high, edge trigger + */ + wb977_wb(0x70, 10); + wb977_wb(0x71, 0x02); + + /* + * GP10: Debounce filter enabled, IRQ, input + */ + wb977_wb(0xe0, 0x19); + + /* + * Enable Group1 + */ + wb977_device_enable(); + + wb977_device_select(8); + + /* + * Group2 base address + */ + wb977_ww(0x60, GP2_IO_BASE); + + /* + * Clear watchdog timer regs + * - timer disable + */ + wb977_wb(0xf2, 0x00); + + /* + * - disable LED, no mouse nor keyboard IRQ + */ + wb977_wb(0xf3, 0x00); + + /* + * - timer counting, disable power LED, disable timeouot + */ + wb977_wb(0xf4, 0x00); + + /* + * Enable group2 + */ + wb977_device_enable(); + + /* + * Set Group1/Group2 outputs + */ + spin_lock_irqsave(&gpio_lock, flags); + gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN); + spin_unlock_irqrestore(&gpio_loc, flags); +} + +/* + * Initialise the Winbond W83977F chip. + */ +__initfunc(static void wb977_init(void)) +{ + request_region(0x370, 2, "W83977AF configuration"); + + /* + * Open up the SuperIO chip + */ + wb977_open(); + + /* + * Initialise the global registers + */ + wb977_init_global(); + + /* + * Initialise the various devices in + * the multi-IO chip. + */ + wb977_init_printer(); + wb977_init_keyboard(); + wb977_init_irda(); + wb977_init_gpio(); + + /* + * Close up the EFER gate + */ + wb977_close(); +} + +void __netwinder_text cpld_modify(int mask, int set) +{ + int msk; + + current_cpld = (current_cpld & ~mask) | set; + + gpio_modify_io(GPIO_DATA, 0); + gpio_modify_op(GPIO_IOLOAD, 0); + + for (msk = 8; msk; msk >>= 1) { + int bit = current_cpld & msk; + + gpio_modify_op(GPIO_DATA | GPIO_IOCLK, bit ? GPIO_DATA : 0); + gpio_modify_op(GPIO_IOCLK, GPIO_IOCLK); + } + + gpio_modify_op(GPIO_IOCLK|GPIO_DATA, 0); + gpio_modify_op(GPIO_IOLOAD|GPIO_DSCLK, GPIO_IOLOAD|GPIO_DSCLK); + gpio_modify_op(GPIO_IOLOAD, 0); +} + +__initfunc(static void cpld_init(void)) +{ + unsigned long flags; + + spin_lock_irqsave(&gpio_lock, flags); + cpld_modify(-1, CPLD_UNMUTE | 4); + spin_unlock_irqrestore(&gpio_lock, flags); +} + +static unsigned char rwa_unlock[] __initdata = +{ 0x00, 0x00, 0x6a, 0xb5, 0xda, 0xed, 0xf6, 0xfb, 0x7d, 0xbe, 0xdf, 0x6f, 0x37, 0x1b, + 0x0d, 0x86, 0xc3, 0x61, 0xb0, 0x58, 0x2c, 0x16, 0x8b, 0x45, 0xa2, 0xd1, 0xe8, 0x74, + 0x3a, 0x9d, 0xce, 0xe7, 0x73, 0x39 }; + +#ifndef DEBUG +#define dprintk if (0) printk +#else +#define dprintk printk +#endif + +#define WRITE_RWA(r,v) do { outb((r), 0x279); outb((v), 0xa79); } while (0) + +static inline void rwa010_unlock(void) +{ + int i; + + WRITE_RWA(2, 2); + mdelay(10); + + for (i = 0; i < sizeof(rwa_unlock); i++) + outb(rwa_unlock[i], 0x279); +} + +static inline void rwa010_read_ident(void) +{ + unsigned char si[9]; + int i, j; + + WRITE_RWA(3, 0); + WRITE_RWA(0, 128); + + outb(1, 0x279); + + mdelay(10); + + dprintk("Identifier: "); + for (i = 0; i < 9; i++) { + si[i] = 0; + for (j = 0; j < 8; j++) { + int bit; + mdelay(1); + inb(0x203); + mdelay(1); + bit = inb(0x203); + dprintk("%02X ", bit); + si[i] |= bit << j; + } + mdelay(10); + dprintk("%02X ", si[i]); + } + dprintk("\n"); +} + +static inline void rwa010_global_init(void) +{ + WRITE_RWA(6, 2); // Assign a card no = 2 + + dprintk("Card no = %d\n", inb(0x203)); + + WRITE_RWA(7, 3); + WRITE_RWA(0x30, 0); + + WRITE_RWA(7, 4); + WRITE_RWA(0x30, 0); + + WRITE_RWA(7, 2); + WRITE_RWA(0x30, 0); +} + +static inline void rwa010_game_port_init(void) +{ + int i; + + WRITE_RWA(7, 5); + + dprintk("Slider base: "); + WRITE_RWA(0x61, 1); + i = inb(0x203); + + WRITE_RWA(0x60, 2); + dprintk("%02X%02X (201)\n", inb(0x203), i); + + WRITE_RWA(0x30, 1); +} + +static inline void rwa010_waveartist_init(int base, int irq, int dma) +{ + int i; + + WRITE_RWA(7, 0); + + dprintk("WaveArtist base: "); + WRITE_RWA(0x61, base); + i = inb(0x203); + + WRITE_RWA(0x60, base >> 8); + dprintk("%02X%02X (%X),", inb(0x203), i, base); + + WRITE_RWA(0x70, irq); + dprintk(" irq: %d (%d),", inb(0x203), irq); + + WRITE_RWA(0x74, dma); + dprintk(" dma: %d (%d)\n", inb(0x203), dma); + + WRITE_RWA(0x30, 1); +} + +static inline void rwa010_soundblaster_init(int sb_base, int al_base, int irq, int dma) +{ + int i; + + WRITE_RWA(7, 1); + + dprintk("SoundBlaster base: "); + WRITE_RWA(0x61, sb_base); + i = inb(0x203); + + WRITE_RWA(0x60, sb_base >> 8); + dprintk("%02X%02X (%X),", inb(0x203), i, sb_base); + + dprintk(" irq: "); + WRITE_RWA(0x70, irq); + dprintk("%d (%d),", inb(0x203), irq); + + dprintk(" 8-bit DMA: "); + WRITE_RWA(0x74, dma); + dprintk("%d (%d)\n", inb(0x203), dma); + + dprintk("AdLib base: "); + WRITE_RWA(0x63, al_base); + i = inb(0x203); + + WRITE_RWA(0x62, al_base >> 8); + dprintk("%02X%02X (%X)\n", inb(0x203), i, al_base); + + WRITE_RWA(0x30, 1); +} + +static void rwa010_soundblaster_reset(void) +{ + int i; + + outb(1, 0x226); + udelay(3); + outb(0, 0x226); + + for (i = 0; i < 5; i++) { + if (inb(0x22e) & 0x80) + break; + mdelay(1); + } + if (i == 5) + printk("SoundBlaster: DSP reset failed\n"); + + dprintk("SoundBlaster DSP reset: %02X (AA)\n", inb(0x22a)); + + for (i = 0; i < 5; i++) { + if ((inb(0x22c) & 0x80) == 0) + break; + mdelay(1); + } + + if (i == 5) + printk("SoundBlaster: DSP not ready\n"); + else { + outb(0xe1, 0x22c); + + dprintk("SoundBlaster DSP id: "); + i = inb(0x22a); + udelay(1); + i |= inb(0x22a) << 8; + dprintk("%04X\n", i); + + for (i = 0; i < 5; i++) { + if ((inb(0x22c) & 0x80) == 0) + break; + mdelay(1); + } + + if (i == 5) + printk("SoundBlaster: could not turn speaker off\n"); + + outb(0xd3, 0x22c); + } + + /* turn on OPL3 */ + outb(5, 0x38a); + outb(1, 0x38b); +} + +__initfunc(static void rwa010_init(void)) +{ + rwa010_unlock(); + rwa010_read_ident(); + rwa010_global_init(); + rwa010_game_port_init(); + rwa010_waveartist_init(0x250, 3, 7); + rwa010_soundblaster_init(0x220, 0x388, 3, 1); + rwa010_soundblaster_reset(); +} + +EXPORT_SYMBOL(gpio_lock); +EXPORT_SYMBOL(gpio_modify_op); +EXPORT_SYMBOL(gpio_modify_io); +EXPORT_SYMBOL(cpld_modify); + +#endif + +#ifdef CONFIG_LEDS +#define DEFAULT_LEDS 0 +#else +#define DEFAULT_LEDS GPIO_GREEN_LED +#endif + +__initfunc(void hw_init(void)) +{ +#ifdef CONFIG_ARCH_NETWINDER + /* + * this ought to have a better home... + * Since this calls the above routines, which are + * compiled only if CONFIG_ARCH_NETWINDER is set, + * these should only be parsed by the compiler + * in the same circumstance. + */ + if (machine_is_netwinder()) { + unsigned long flags; + + wb977_init(); + cpld_init(); + rwa010_init(); + + spin_lock_irqsave(&gpio_lock, flags); + gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS); + spin_unlock_irqrestore(&gpio_lock, flags); + } +#endif + + leds_event(led_start); +} diff --git a/arch/arm/kernel/iic.c b/arch/arm/kernel/iic.c index 6eb0122e8..c9a672a32 100644 --- a/arch/arm/kernel/iic.c +++ b/arch/arm/kernel/iic.c @@ -7,20 +7,24 @@ */ #include <linux/delay.h> +#include <linux/errno.h> #include <asm/system.h> -#include <asm/io.h> #include <asm/hardware.h> +#include <asm/io.h> +#include <asm/ioc.h> + +#define FORCE_ONES 0xdc /* * if delay loop has been calibrated then us that, * else use IOC timer 1. */ -static void iic_delay (void) +static void iic_delay(void) { extern unsigned long loops_per_sec; if (loops_per_sec != (1 << 12)) { - udelay(10); + udelay(100); /* was 10 */ return; } else { unsigned long flags; @@ -30,7 +34,7 @@ static void iic_delay (void) outb(255, IOC_T1LTCHH); outb(0, IOC_T1GO); outb(1<<6, IOC_IRQCLRA); /* clear T1 irq */ - outb(4, IOC_T1LTCHL); + outb(10, IOC_T1LTCHL); /* was 4 */ outb(0, IOC_T1LTCHH); outb(0, IOC_T1GO); while ((inb(IOC_IRQSTATA) & (1<<6)) == 0); @@ -38,124 +42,207 @@ static void iic_delay (void) } } -static inline void iic_start (void) +#define IIC_INIT() dat = (inb(IOC_CONTROL) | FORCE_ONES) & ~3 +#define IIC_SET_DAT outb(dat|=1, IOC_CONTROL); +#define IIC_CLR_DAT outb(dat&=~1, IOC_CONTROL); +#define IIC_SET_CLK outb(dat|=2, IOC_CONTROL); +#define IIC_CLR_CLK outb(dat&=~2, IOC_CONTROL); +#define IIC_DELAY iic_delay(); +#define IIC_READ_DATA() (inb(IOC_CONTROL) & 1) + +static inline void iic_set_lines(int clk, int dat) { - unsigned char out; + int old; - out = inb(IOC_CONTROL) | 0xc2; + old = inb(IOC_CONTROL) | FORCE_ONES; - outb(out, IOC_CONTROL); - iic_delay(); + old &= ~3; + + if (clk) + old |= 2; + if (dat) + old |= 1; + + outb(old, IOC_CONTROL); - outb(out ^ 1, IOC_CONTROL); iic_delay(); } -static inline void iic_stop (void) +static inline unsigned int iic_read_data(void) { - unsigned char out; + return inb(IOC_CONTROL) & 1; +} - out = inb(IOC_CONTROL) | 0xc3; +/* + * C: ==~~_ + * D: =~~__ + */ +static inline void iic_start(void) +{ + unsigned int dat; - iic_delay(); - outb(out ^ 1, IOC_CONTROL); + IIC_INIT(); - iic_delay(); - outb(out, IOC_CONTROL); + IIC_SET_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY + + IIC_CLR_DAT + IIC_DELAY + IIC_CLR_CLK + IIC_DELAY } -static int iic_sendbyte (unsigned char b) +/* + * C: __~~ + * D: =__~ + */ +static inline void iic_stop(void) { - unsigned char out, in; - int i; + unsigned int dat; - out = (inb(IOC_CONTROL) & 0xfc) | 0xc0; + IIC_INIT(); - outb(out, IOC_CONTROL); - for (i = 7; i >= 0; i--) { - unsigned char c; - c = out | ((b & (1 << i)) ? 1 : 0); + IIC_CLR_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY + IIC_SET_DAT + IIC_DELAY +} - outb(c, IOC_CONTROL); - iic_delay(); +/* + * C: __~_ + * D: =___ + */ +static inline void iic_acknowledge(void) +{ + unsigned int dat; - outb(c | 2, IOC_CONTROL); - iic_delay(); + IIC_INIT(); - outb(c, IOC_CONTROL); - } - outb(out | 1, IOC_CONTROL); - iic_delay(); + IIC_CLR_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY + IIC_CLR_CLK + IIC_DELAY +} - outb(out | 3, IOC_CONTROL); - iic_delay(); +/* + * C: __~_ + * D: =~H~ + */ +static inline int iic_is_acknowledged(void) +{ + unsigned int dat, ack_bit; - in = inb(IOC_CONTROL) & 1; + IIC_INIT(); - outb(out | 1, IOC_CONTROL); - iic_delay(); + IIC_SET_DAT + IIC_DELAY + IIC_SET_CLK + IIC_DELAY - outb(out, IOC_CONTROL); - iic_delay(); + ack_bit = IIC_READ_DATA(); + + IIC_CLR_CLK + IIC_DELAY + + return ack_bit == 0; +} + +/* + * C: _~__~__~__~__~__~__~__~_ + * D: =DDXDDXDDXDDXDDXDDXDDXDD + */ +static void iic_sendbyte(unsigned int b) +{ + unsigned int dat, i; + + IIC_INIT(); + + for (i = 0; i < 8; i++) { + if (b & 128) + IIC_SET_DAT + else + IIC_CLR_DAT + IIC_DELAY + + IIC_SET_CLK + IIC_DELAY + IIC_CLR_CLK + IIC_DELAY - if(in) { - printk("No acknowledge from RTC\n"); - return 1; - } else - return 0; + b <<= 1; + } } -static unsigned char iic_recvbyte (void) +/* + * C: __~_~_~_~_~_~_~_~_ + * D: =~HHHHHHHHHHHHHHHH + */ +static unsigned char iic_recvbyte(void) { - unsigned char out, in; - int i; + unsigned int dat, i, in; - out = (inb(IOC_CONTROL) & 0xfc) | 0xc0; + IIC_INIT(); + + IIC_SET_DAT + IIC_DELAY - outb(out, IOC_CONTROL); in = 0; - for (i = 7; i >= 0; i--) { - outb(out | 1, IOC_CONTROL); - iic_delay(); - outb(out | 3, IOC_CONTROL); - iic_delay(); - in = (in << 1) | (inb(IOC_CONTROL) & 1); - outb(out | 1, IOC_CONTROL); - iic_delay(); + for (i = 0; i < 8; i++) { + IIC_SET_CLK + IIC_DELAY + + in = (in << 1) | IIC_READ_DATA(); + + IIC_CLR_CLK + IIC_DELAY } - outb(out, IOC_CONTROL); - iic_delay(); - outb(out | 2, IOC_CONTROL); - iic_delay(); return in; } -void iic_control (unsigned char addr, unsigned char loc, unsigned char *buf, int len) +int iic_control (unsigned char addr, unsigned char loc, unsigned char *buf, int len) { - iic_start(); + int i, err = -EIO; - if (iic_sendbyte(addr & 0xfe)) + iic_start(); + iic_sendbyte(addr & 0xfe); + if (!iic_is_acknowledged()) goto error; - if (iic_sendbyte(loc)) + iic_sendbyte(loc); + if (!iic_is_acknowledged()) goto error; if (addr & 1) { - int i; - - for (i = 0; i < len; i++) - if (iic_sendbyte (buf[i])) - break; - } else { - int i; - iic_stop(); iic_start(); iic_sendbyte(addr|1); - for (i = 0; i < len; i++) - buf[i] = iic_recvbyte (); + if (!iic_is_acknowledged()) + goto error; + + for (i = 0; i < len - 1; i++) { + buf[i] = iic_recvbyte(); + iic_acknowledge(); + } + buf[i] = iic_recvbyte(); + } else { + for (i = 0; i < len; i++) { + iic_sendbyte(buf[i]); + + if (!iic_is_acknowledged()) + goto error; + } } + + err = 0; error: iic_stop(); + + return err; } diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index 99577f1b7..5d09ea540 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -6,9 +6,10 @@ static struct vm_area_struct init_mmap = INIT_MMAP; static struct fs_struct init_fs = INIT_FS; +static struct file * init_fd_array[NR_OPEN] = { NULL, }; static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS; -struct mm_struct init_mm = INIT_MM; +struct mm_struct init_mm = INIT_MM(init_mm); /* * Initial task structure. @@ -20,4 +21,5 @@ struct mm_struct init_mm = INIT_MM; * * The things we do for performance.. */ -union task_union init_task_union __attribute__((__section__(".init.task"))) = { INIT_TASK }; +union task_union init_task_union __attribute__((__section__(".init.task"))) = + { INIT_TASK(init_task_union.task) }; diff --git a/arch/arm/kernel/ioport.c b/arch/arm/kernel/ioport.c deleted file mode 100644 index d375dcbdd..000000000 --- a/arch/arm/kernel/ioport.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * linux/arch/arm/kernel/ioport.c - * - * Io-port support is not used for ARM - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/ioport.h> - -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -/*asmlinkage void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) -{ -}*/ - -asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) -{ - return -ENOSYS; -} - -asmlinkage int sys_iopl(long ebx,long ecx,long edx, - long esi, long edi, long ebp, long eax, long ds, - long es, long fs, long gs, long orig_eax, - long eip,long cs,long eflags,long esp,long ss) -{ - return -ENOSYS; -} diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 332e8940d..ee6e07c6c 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -23,7 +23,6 @@ #include <linux/sched.h> #include <linux/ioport.h> #include <linux/interrupt.h> -#include <linux/timex.h> #include <linux/malloc.h> #include <linux/random.h> #include <linux/smp.h> @@ -32,7 +31,6 @@ #include <asm/hardware.h> #include <asm/io.h> -#include <asm/pgtable.h> #include <asm/system.h> #ifndef SMP @@ -46,10 +44,22 @@ #define cliIF() #endif +/* + * Maximum IRQ count. Currently, this is arbitary. + * However, it should not be set too low to prevent + * false triggering. Conversely, if it is set too + * high, then you could miss a stuck IRQ. + * + * Maybe we ought to set a timer and re-enable the + * IRQ at a later time? + */ +#define MAX_IRQ_CNT 100000 + unsigned int local_bh_count[NR_CPUS]; unsigned int local_irq_count[NR_CPUS]; spinlock_t irq_controller_lock; +int setup_arm_irq(int, struct irqaction *); extern int get_fiq_list(char *); extern void init_FIQ(void); @@ -60,17 +70,29 @@ struct irqdesc { unsigned int probing : 1; /* IRQ in use for a probe */ unsigned int probe_ok : 1; /* IRQ can be used for probe */ unsigned int valid : 1; /* IRQ claimable */ - unsigned int unused :26; + unsigned int noautoenable : 1; /* don't automatically enable IRQ */ + unsigned int unused :25; void (*mask_ack)(unsigned int irq); /* Mask and acknowledge IRQ */ void (*mask)(unsigned int irq); /* Mask IRQ */ void (*unmask)(unsigned int irq); /* Unmask IRQ */ struct irqaction *action; - unsigned int unused2[3]; + /* + * IRQ lock detection + */ + unsigned int lck_cnt; + unsigned int lck_pc; + unsigned int lck_jif; }; static struct irqdesc irq_desc[NR_IRQS]; /* + * Get architecture specific interrupt handlers + * and interrupt initialisation. + */ +#include <asm/arch/irq.h> + +/* * Dummy mask/unmask handler */ static void dummy_mask_unmask_irq(unsigned int irq) @@ -94,10 +116,12 @@ void enable_irq(unsigned int irq) spin_lock_irqsave(&irq_controller_lock, flags); cliIF(); - irq_desc[irq].enabled = 1; irq_desc[irq].probing = 0; irq_desc[irq].triggered = 0; - irq_desc[irq].unmask(irq); + if (!irq_desc[irq].noautoenable) { + irq_desc[irq].enabled = 1; + irq_desc[irq].unmask(irq); + } spin_unlock_irqrestore(&irq_controller_lock, flags); } @@ -119,21 +143,52 @@ int get_irq_list(char *buf) *p++ = '\n'; } -#ifdef CONFIG_ACORN +#ifdef CONFIG_ARCH_ACORN p += get_fiq_list(p); #endif return p - buf; } /* + * IRQ lock detection. + * + * Hopefully, this should get us out of a few locked situations. + * However, it may take a while for this to happen, since we need + * a large number if IRQs to appear in the same jiffie with the + * same instruction pointer (or within 2 instructions). + */ +static void check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs) +{ + unsigned long instr_ptr = instruction_pointer(regs); + + if (desc->lck_jif == jiffies && + desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) { + desc->lck_cnt += 1; + + if (desc->lck_cnt > MAX_IRQ_CNT) { + printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq); + disable_irq(irq); + } + } else { + desc->lck_cnt = 0; + desc->lck_pc = instruction_pointer(regs); + desc->lck_jif = jiffies; + } +} + +/* * do_IRQ handles all normal device IRQ's */ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) { - struct irqdesc * desc = irq_desc + irq; + struct irqdesc * desc; struct irqaction * action; int status, cpu; + irq = fixup_irq(irq); + + desc = irq_desc + irq; + spin_lock(&irq_controller_lock); desc->mask_ack(irq); spin_unlock(&irq_controller_lock); @@ -174,6 +229,12 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) } } + /* + * Debug measure - hopefully we can continue if an + * IRQ lockup problem occurs... + */ + check_irq_lock(desc, irq, regs); + irq_exit(cpu, irq); /* @@ -181,15 +242,10 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs) * a return code from the irq handler to tell us * whether the handler wants us to do software bottom * half handling or not.. - * - * ** IMPORTANT NOTE: do_bottom_half() ENABLES IRQS!!! ** - * ** WE MUST DISABLE THEM AGAIN, ELSE IDE DISKS GO ** - * ** AWOL ** */ if (1) { if (bh_active & bh_mask) do_bottom_half(); - __cli(); } } @@ -227,11 +283,27 @@ int setup_arm_irq(int irq, struct irqaction * new) struct irqaction *old, **p; unsigned long flags; - if (new->flags & SA_SAMPLE_RANDOM) + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ rand_initialize_irq(irq); + } + /* + * The following block of code has to be executed atomically + */ spin_lock_irqsave(&irq_controller_lock, flags); - p = &irq_desc[irq].action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ @@ -252,28 +324,24 @@ int setup_arm_irq(int irq, struct irqaction * new) if (!shared) { irq_desc[irq].nomask = (new->flags & SA_IRQNOMASK) ? 1 : 0; - irq_desc[irq].enabled = 1; irq_desc[irq].probing = 0; - irq_desc[irq].unmask(irq); + if (!irq_desc[irq].noautoenable) { + irq_desc[irq].enabled = 1; + irq_desc[irq].unmask(irq); + } } spin_unlock_irqrestore(&irq_controller_lock, flags); return 0; } -/* - * Using "struct sigaction" is slightly silly, but there - * are historical reasons and it works well, so.. - */ int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long irq_flags, const char * devname, void *dev_id) { unsigned long retval; struct irqaction *action; - if (!irq_desc[irq].valid) - return -EINVAL; - if (!handler) + if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler) return -EINVAL; action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL); @@ -299,28 +367,30 @@ void free_irq(unsigned int irq, void *dev_id) struct irqaction * action, **p; unsigned long flags; - if (!irq_desc[irq].valid) { + if (irq >= NR_IRQS || !irq_desc[irq].valid) { printk(KERN_ERR "Trying to free IRQ%d\n",irq); #ifdef CONFIG_DEBUG_ERRORS __backtrace(); #endif return; } + + spin_lock_irqsave(&irq_controller_lock, flags); for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { if (action->dev_id != dev_id) continue; /* Found it - now free it */ - save_flags_cli (flags); *p = action->next; - restore_flags (flags); kfree(action); - return; + goto out; } printk(KERN_ERR "Trying to free free IRQ%d\n",irq); #ifdef CONFIG_DEBUG_ERRORS __backtrace(); #endif +out: + spin_unlock_irqrestore(&irq_controller_lock, flags); } /* Start the interrupt probing. Unlike other architectures, @@ -346,7 +416,6 @@ unsigned long probe_irq_on(void) continue; irq_desc[i].probing = 1; - irq_desc[i].enabled = 1; irq_desc[i].triggered = 0; irq_desc[i].unmask(i); irqs += 1; @@ -364,7 +433,8 @@ unsigned long probe_irq_on(void) */ spin_lock_irq(&irq_controller_lock); for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && irq_desc[i].triggered) { + if (irq_desc[i].probing && + irq_desc[i].triggered) { irq_desc[i].probing = 0; irqs -= 1; } @@ -383,7 +453,7 @@ unsigned long probe_irq_on(void) int probe_irq_off(unsigned long irqs) { unsigned int i; - int irq_found = -1; + int irq_found = NO_IRQ; /* * look at the interrupts, and find exactly one @@ -393,7 +463,7 @@ int probe_irq_off(unsigned long irqs) for (i = 0; i < NR_IRQS; i++) { if (irq_desc[i].probing && irq_desc[i].triggered) { - if (irq_found != -1) { + if (irq_found != NO_IRQ) { irq_found = NO_IRQ; goto out; } @@ -405,21 +475,19 @@ int probe_irq_off(unsigned long irqs) irq_found = NO_IRQ; out: spin_unlock_irq(&irq_controller_lock); + return irq_found; } -/* - * Get architecture specific interrupt handlers - * and interrupt initialisation. - */ -#include <asm/arch/irq.h> - __initfunc(void init_IRQ(void)) { extern void init_dma(void); int irq; for (irq = 0; irq < NR_IRQS; irq++) { + irq_desc[irq].probe_ok = 0; + irq_desc[irq].valid = 0; + irq_desc[irq].noautoenable = 0; irq_desc[irq].mask_ack = dummy_mask_unmask_irq; irq_desc[irq].mask = dummy_mask_unmask_irq; irq_desc[irq].unmask = dummy_mask_unmask_irq; diff --git a/arch/arm/kernel/leds-ebsa110.c b/arch/arm/kernel/leds-ebsa110.c index cc2f7a91d..eb286347b 100644 --- a/arch/arm/kernel/leds-ebsa110.c +++ b/arch/arm/kernel/leds-ebsa110.c @@ -7,11 +7,13 @@ * * - Red - toggles state every 50 timer interrupts */ +#include <linux/module.h> + #include <asm/hardware.h> #include <asm/leds.h> #include <asm/system.h> -void leds_event(led_event_t ledevt) +void ebsa110_leds_event(led_event_t ledevt) { unsigned long flags; @@ -28,3 +30,7 @@ void leds_event(led_event_t ledevt) restore_flags(flags); } + +void (*leds_event)(led_event_t) = ebsa110_leds_event; + +EXPORT_SYMBOL(leds_event); diff --git a/arch/arm/kernel/leds-ebsa285.c b/arch/arm/kernel/leds-ebsa285.c deleted file mode 100644 index a8cf2e775..000000000 --- a/arch/arm/kernel/leds-ebsa285.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * arch/arm/kernel/leds-ebsa285.c - * - * Copyright (C) 1998 Russell King - * - * EBSA-285 LED control routines. We use the leds as follows: - * - * - Green - toggles state every 50 timer interrupts - * - Amber - On if system is not idle - * - Red - currently unused - */ -#include <asm/hardware.h> -#include <asm/leds.h> -#include <asm/system.h> - -static char led_state = XBUS_LED_RED | XBUS_LED_GREEN; - -void leds_event(led_event_t ledevt) -{ - unsigned long flags; - - save_flags_cli(flags); - - switch(ledevt) { - case led_idle_start: - led_state |= XBUS_LED_AMBER; - break; - - case led_idle_end: - led_state &= ~XBUS_LED_AMBER; - break; - - case led_timer: - led_state ^= XBUS_LED_GREEN; - break; - - default: - break; - } - - restore_flags(flags); - - *XBUS_LEDS = led_state; -} diff --git a/arch/arm/kernel/leds-footbridge.c b/arch/arm/kernel/leds-footbridge.c new file mode 100644 index 000000000..cb6c7f4b4 --- /dev/null +++ b/arch/arm/kernel/leds-footbridge.c @@ -0,0 +1,249 @@ +/* + * arch/arm/kernel/leds-footbridge.c + * + * Copyright (C) 1998-1999 Russell King + * + * EBSA-285 and NetWinder LED control routines. + * + * The EBSA-285 uses the leds as follows: + * - Green - toggles state every 50 timer interrupts + * - Amber - On if system is not idle + * - Red - currently unused + * + * The Netwinder uses the leds as follows: + * - Green - toggles state every 50 timer interrupts + * - Red - On if the system is not idle + * + * Changelog: + * 02-05-1999 RMK Various cleanups + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <asm/hardware.h> +#include <asm/leds.h> +#include <asm/spinlock.h> +#include <asm/system.h> + +#define LED_STATE_ENABLED 1 +#define LED_STATE_CLAIMED 2 +static char led_state; +static char hw_led_state; + +static spinlock_t leds_lock = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_ARCH_EBSA285 + +static void __ebsa285_text ebsa285_leds_event(led_event_t evt) +{ + unsigned long flags; + + spin_lock_irqsave(&leds_lock, flags); + + switch (evt) { + case led_start: + hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN; +#ifndef CONFIG_LEDS_IDLE + hw_led_state |= XBUS_LED_AMBER; +#endif + led_state |= LED_STATE_ENABLED; + break; + + case led_stop: + led_state &= ~LED_STATE_ENABLED; + break; + + case led_claim: + led_state |= LED_STATE_CLAIMED; + hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN | XBUS_LED_AMBER; + break; + + case led_release: + led_state &= ~LED_STATE_CLAIMED; + hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN | XBUS_LED_AMBER; + break; + +#ifdef CONFIG_LEDS_TIMER + case led_timer: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state ^= XBUS_LED_GREEN; + break; +#endif + +#ifdef CONFIG_LEDS_CPU + case led_idle_start: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state |= XBUS_LED_RED; + break; + + case led_idle_end: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state &= ~XBUS_LED_RED; + break; +#endif + + case led_green_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~XBUS_LED_GREEN; + break; + + case led_green_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= XBUS_LED_GREEN; + break; + + case led_amber_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~XBUS_LED_AMBER; + break; + + case led_amber_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= XBUS_LED_AMBER; + break; + + case led_red_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~XBUS_LED_RED; + break; + + case led_red_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= XBUS_LED_RED; + break; + + default: + break; + } + + if (led_state & LED_STATE_ENABLED) + *XBUS_LEDS = hw_led_state; + + spin_unlock_irqrestore(&leds_lock, flags); +} + +#endif + +#ifdef CONFIG_ARCH_NETWINDER + +static void __netwinder_text netwinder_leds_event(led_event_t evt) +{ + unsigned long flags; + + spin_lock_irqsave(&leds_lock, flags); + + switch (evt) { + case led_start: + led_state |= LED_STATE_ENABLED; + hw_led_state = 0; + break; + + case led_stop: + led_state &= ~LED_STATE_ENABLED; + break; + + case led_claim: + led_state |= LED_STATE_CLAIMED; + hw_led_state = 0; + break; + + case led_release: + led_state &= ~LED_STATE_CLAIMED; + hw_led_state = 0; + break; + +#ifdef CONFIG_LEDS_TIMER + case led_timer: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state ^= GPIO_GREEN_LED; + break; +#endif + +#ifdef CONFIG_LEDS_CPU + case led_idle_start: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state &= ~GPIO_RED_LED; + break; + + case led_idle_end: + if (!(led_state & LED_STATE_CLAIMED)) + hw_led_state |= GPIO_RED_LED; + break; +#endif + + case led_green_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= GPIO_GREEN_LED; + break; + + case led_green_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~GPIO_GREEN_LED; + break; + + case led_amber_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= GPIO_GREEN_LED | GPIO_RED_LED; + break; + + case led_amber_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~(GPIO_GREEN_LED | GPIO_RED_LED); + break; + + case led_red_on: + if (led_state & LED_STATE_CLAIMED) + hw_led_state |= GPIO_RED_LED; + break; + + case led_red_off: + if (led_state & LED_STATE_CLAIMED) + hw_led_state &= ~GPIO_RED_LED; + break; + + default: + break; + } + + spin_unlock_irqrestore(&leds_lock, flags); + + if (led_state & LED_STATE_ENABLED) { + spin_lock_irqsave(&gpio_lock, flags); + gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state); + spin_unlock_irqrestore(&gpio_lock, flags); + } +} + +#endif + +static void dummy_leds_event(led_event_t evt) +{ +} + +__initfunc(void +init_leds_event(led_event_t evt)) +{ + switch (machine_arch_type) { +#ifdef CONFIG_ARCH_EBSA285 + case MACH_TYPE_EBSA285: + leds_event = ebsa285_leds_event; + break; +#endif +#ifdef CONFIG_ARCH_NETWINDER + case MACH_TYPE_NETWINDER: + leds_event = netwinder_leds_event; + break; +#endif + + default: + leds_event = dummy_leds_event; + } + + leds_event(evt); +} + +void (*leds_event)(led_event_t) = init_leds_event; + +EXPORT_SYMBOL(leds_event); diff --git a/arch/arm/kernel/oldlatches.c b/arch/arm/kernel/oldlatches.c index c4674cd35..a908241d2 100644 --- a/arch/arm/kernel/oldlatches.c +++ b/arch/arm/kernel/oldlatches.c @@ -4,6 +4,7 @@ * (c) David Alan Gilbert 1995/1996 */ #include <linux/kernel.h> +#include <linux/init.h> #include <asm/io.h> #include <asm/hardware.h> @@ -40,7 +41,7 @@ void oldlatch_bupdate(unsigned char mask,unsigned char newdata) } #endif -void oldlatch_init(void) +void __init oldlatch_init(void) { printk("oldlatch: init\n"); #ifdef LATCHAADDR diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 6ea02d891..68bf5aa1f 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -34,7 +34,6 @@ #include <linux/init.h> #include <asm/uaccess.h> -#include <asm/pgtable.h> #include <asm/system.h> #include <asm/io.h> @@ -55,46 +54,37 @@ void enable_hlt(void) } /* - * The idle loop on an arm.. + * The idle loop on an ARM... */ asmlinkage int sys_idle(void) { - int ret = -EPERM; - - lock_kernel(); if (current->pid != 0) - goto out; + return -EPERM; + /* endless idle loop with no priority at all */ - current->priority = -100; - for (;;) - { + while (1) { + if (!current->need_resched && !hlt_counter) + proc_idle(); + current->policy = SCHED_YIELD; + schedule(); +#ifndef CONFIG_NO_PGT_CACHE check_pgt_cache(); -#if 0 //def ARCH_IDLE_OK - if (!hlt_counter && !current->need_resched) - proc_idle (); #endif - run_task_queue(&tq_scheduler); - schedule(); } - ret = 0; -out: - unlock_kernel(); - return ret; } +static char reboot_mode = 'h'; + __initfunc(void reboot_setup(char *str, int *ints)) { + reboot_mode = str[0]; } -/* - * This routine reboots the machine by resetting the expansion cards via - * their loaders, turning off the processor cache (if ARM3), copying the - * first instruction of the ROM to 0, and executing it there. - */ void machine_restart(char * __unused) { - proc_hard_reset (); - arch_hard_reset (); + arch_reset(reboot_mode); + panic("Reboot failed\n"); + while (1); } void machine_halt(void) @@ -150,6 +140,67 @@ void show_regs(struct pt_regs * regs) } /* + * Task structure and kernel stack allocation. + * + * Taken from the i386 version. + */ +#ifdef CONFIG_CPU_32 +#define EXTRA_TASK_STRUCT 8 +static struct task_struct *task_struct_stack[EXTRA_TASK_STRUCT]; +static int task_struct_stack_ptr = -1; +#endif + +struct task_struct *alloc_task_struct(void) +{ + struct task_struct *tsk; + +#ifndef EXTRA_TASK_STRUCT + tsk = ll_alloc_task_struct(); +#else + int index; + + index = task_struct_stack_ptr; + if (index >= EXTRA_TASK_STRUCT/2) + goto use_cache; + + tsk = ll_alloc_task_struct(); + + if (!tsk) { + index = task_struct_stack_ptr; + + if (index >= 0) { +use_cache: tsk = task_struct_stack[index]; + task_struct_stack_ptr = index - 1; + } + } +#endif +#ifdef CONFIG_SYSRQ + /* You need this if you want SYSRQ-T to give sensible stack + * usage information + */ + if (tsk) { + char *p = (char *)tsk; + memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE); + } +#endif + + return tsk; +} + +void free_task_struct(struct task_struct *p) +{ +#ifdef EXTRA_TASK_STRUCT + int index = task_struct_stack_ptr + 1; + + if (index < EXTRA_TASK_STRUCT) { + task_struct_stack[index] = p; + task_struct_stack_ptr = index; + } else +#endif + ll_free_task_struct(p); +} + +/* * Free current thread data structures etc.. */ void exit_thread(void) @@ -179,9 +230,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, childregs = ((struct pt_regs *)((unsigned long)p + 8192)) - 1; *childregs = *regs; childregs->ARM_r0 = 0; + childregs->ARM_sp = esp; save = ((struct context_save_struct *)(childregs)) - 1; - copy_thread_css(save); + init_thread_css(save); p->tss.save = save; return 0; @@ -224,3 +276,29 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs = *regs; dump->u_fpvalid = dump_fpu (regs, &dump->u_fp); } + +/* + * This is the mechanism for creating a new kernel thread. + * + * NOTE! Only a kernel-only process(ie the swapper or direct descendants + * who haven't done an "execve()") should use this: it will work within + * a system call from a "real" process, but the process memory space will + * not be free'd until both the parent and the child have exited. + */ +pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + extern int sys_exit(int) __attribute__((noreturn)); + pid_t __ret; + + __asm__ __volatile__( + "mov r0, %1 @ kernel_thread sys_clone\n" +" mov r1, #0\n" + __syscall(clone)"\n" +" mov %0, r0" + : "=r" (__ret) + : "Ir" (flags | CLONE_VM) : "r0", "r1"); + if (__ret == 0) + sys_exit((fn)(arg)); + return __ret; +} + diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 5e3bdfe3b..fbc3a2187 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -34,11 +34,11 @@ */ static inline long get_stack_long(struct task_struct *task, int offset) { - unsigned char *stack; + struct pt_regs *regs; - stack = (unsigned char *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); - stack += offset << 2; - return *(unsigned long *)stack; + regs = (struct pt_regs *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); + + return regs->uregs[offset]; } /* @@ -50,11 +50,12 @@ static inline long get_stack_long(struct task_struct *task, int offset) static inline long put_stack_long(struct task_struct *task, int offset, unsigned long data) { - unsigned char *stack; + struct pt_regs *regs; + + regs = (struct pt_regs *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); + + regs->uregs[offset] = data; - stack = (unsigned char *)((unsigned long)task + 8192 - sizeof(struct pt_regs)); - stack += offset << 2; - *(unsigned long *) stack = data; return 0; } @@ -157,11 +158,16 @@ repeat: if (MAP_NR(page) < max_mapnr) { page += addr & ~PAGE_MASK; + + flush_cache_range(vma->vm_mm, addr, addr + sizeof(unsigned long)); + *(unsigned long *)page = data; - __flush_entry_to_ram(page); + + clean_cache_area(page, sizeof(unsigned long)); + + set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + flush_tlb_page(vma, addr & PAGE_MASK); } - set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - flush_tlb(); } /* @@ -343,8 +349,7 @@ printk ("op2=r%02ldsh%dx%d", insn & 15, shift, type); printk ("=%08lX ", val); return val; } -#undef pc_pointer -#define pc_pointer(x) ((x) & 0x03fffffc) + int ptrace_set_bpt (struct task_struct *child) { unsigned long insn, pc, alt; @@ -651,7 +656,6 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) return 0; wake_up_process (child); child->exit_code = SIGKILL; - ptrace_cancel_bpt (child); /* make sure single-step breakpoint is gone. */ ptrace_cancel_bpt (child); ret = 0; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index cddc3fab3..0b0a70087 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -56,12 +56,17 @@ #define SUPPORT_CPU_SA110 #endif -#ifndef CONFIG_CMDLINE -#define CONFIG_CMDLINE "root=/dev/nfs rw" -#endif #define MEM_SIZE (16*1024*1024) #define COMMAND_LINE_SIZE 256 +#ifndef CONFIG_CMDLINE +#define CONFIG_CMDLINE "" +#endif + +extern void reboot_setup(char *str, int *ints); +extern void fpe_init(void); +extern void disable_hlt(void); + struct drive_info_struct { char dummy[32]; } drive_info; struct screen_info screen_info = { orig_video_lines: 30, @@ -87,20 +92,26 @@ const struct armversions armidlist[] = { /*-- Match -- --- Mask -- -- Manu -- Processor uname -m --- ELF STUFF --- --- processor asm funcs --- */ #if defined(CONFIG_CPU_26) + /* ARM2 fake ident */ { 0x41560200, 0xfffffff0, "ARM/VLSI", "arm2" , "armv1" , "v1", 0, &arm2_processor_functions }, + /* ARM250 fake ident */ { 0x41560250, 0xfffffff0, "ARM/VLSI", "arm250" , "armv2" , "v2", HWCAP_SWP, &arm250_processor_functions }, + /* ARM3 processors */ { 0x41560300, 0xfffffff0, "ARM/VLSI", "arm3" , "armv2" , "v2", HWCAP_SWP, &arm3_processor_functions }, #elif defined(CONFIG_CPU_32) #ifdef SUPPORT_CPU_ARM6 + /* ARM6 */ { 0x41560600, 0xfffffff0, "ARM/VLSI", "arm6" , "armv3" , "v3", HWCAP_SWP, &arm6_processor_functions }, + /* ARM610 */ { 0x41560610, 0xfffffff0, "ARM/VLSI", "arm610" , "armv3" , "v3", HWCAP_SWP, &arm6_processor_functions }, #endif #ifdef SUPPORT_CPU_ARM7 + /* ARM7's have a strange numbering */ { 0x41007000, 0xffffff00, "ARM/VLSI", "arm7" , "armv3" , "v3", HWCAP_SWP, &arm7_processor_functions }, /* ARM710 IDs are non-standard */ @@ -108,10 +119,16 @@ const struct armversions armidlist[] = { &arm7_processor_functions }, #endif #ifdef SUPPORT_CPU_SA110 - { 0x4401a100, 0xfffffff0, "DEC", "sa110" , "armv4" , "v3", HWCAP_SWP|HWCAP_HALF, +#ifdef CONFIG_ARCH_RPC + /* Acorn RiscPC's can't handle ARMv4 half-word instructions */ + { 0x4401a100, 0xfffffff0, "Intel", "sa110" , "armv4" , "v4", HWCAP_SWP, + &sa110_processor_functions }, +#else + { 0x4401a100, 0xfffffff0, "Intel", "sa110" , "armv4" , "v4", HWCAP_SWP|HWCAP_HALF, &sa110_processor_functions }, #endif #endif +#endif { 0x00000000, 0x00000000, "***", "unknown", "unknown", "**", 0, NULL } }; @@ -119,7 +136,7 @@ const struct armversions armidlist[] = { * From head-armv.S */ unsigned int processor_id; -unsigned int machine_type; +unsigned int __machine_arch_type; int armidindex; extern int root_mountflags; @@ -132,139 +149,10 @@ extern int _etext, _edata, _end; */ /* - * Risc-PC specific initialisation - */ -#ifdef CONFIG_ARCH_RPC - -#include <asm/arch/mmu.h> - -unsigned int vram_half_sam; - -static void -setup_rpc(struct param_struct *params) -{ - extern void init_dram_banks(const struct param_struct *params); - - init_dram_banks(params); - - switch (params->u1.s.pages_in_vram) { - case 256: - vram_half_sam = 1024; - break; - case 512: - default: - vram_half_sam = 2048; - } -} -#else -#define setup_rpc(x) -#endif - -#ifdef PARAMS_BASE - -#ifdef CONFIG_ARCH_ACORN -int memc_ctrl_reg; -int number_ide_drives; -int number_mfm_drives; -#endif - -static struct param_struct *params = (struct param_struct *)PARAMS_BASE; - -__initfunc(static char * -setup_params(unsigned long *mem_end_p)) -{ - ROOT_DEV = to_kdev_t(params->u1.s.rootdev); - ORIG_X = params->u1.s.video_x; - ORIG_Y = params->u1.s.video_y; - ORIG_VIDEO_COLS = params->u1.s.video_num_cols; - ORIG_VIDEO_LINES = params->u1.s.video_num_rows; - -#ifdef CONFIG_ARCH_ACORN -#ifndef CONFIG_FB - { - extern int bytes_per_char_h; - extern int bytes_per_char_v; - - bytes_per_char_h = params->u1.s.bytes_per_char_h; - bytes_per_char_v = params->u1.s.bytes_per_char_v; - } -#endif - memc_ctrl_reg = params->u1.s.memc_control_reg; - number_ide_drives = (params->u1.s.adfsdrives >> 6) & 3; - number_mfm_drives = (params->u1.s.adfsdrives >> 3) & 3; - - setup_rpc(params); - - if (!(params->u1.s.flags & FLAG_READONLY)) - root_mountflags &= ~MS_RDONLY; -#endif -#ifdef CONFIG_BLK_DEV_RAM - { - extern int rd_doload; - extern int rd_prompt; - extern int rd_image_start; - - rd_image_start = params->u1.s.rd_start; - rd_prompt = (params->u1.s.flags & FLAG_RDPROMPT) == 0; - rd_doload = (params->u1.s.flags & FLAG_RDLOAD) == 0; - } -#endif - -#ifdef CONFIG_ARCH_ACORN - *mem_end_p = GET_MEMORY_END(params); -#elif defined(CONFIG_ARCH_EBSA285) - *mem_end_p = PAGE_OFFSET + params->u1.s.page_size * params->u1.s.nr_pages; -#else - *mem_end_p = PAGE_OFFSET + MEM_SIZE; -#endif - - return params->commandline; -} - -#else - -static char default_command_line[] __initdata = CONFIG_CMDLINE; - -__initfunc(static char * -setup_params(unsigned long *mem_end_p)) -{ - ROOT_DEV = 0x00ff; - -#ifdef CONFIG_BLK_DEV_RAM - { - extern int rd_doload; - extern int rd_prompt; - extern int rd_image_start; - - rd_image_start = 0; - rd_prompt = 1; - rd_doload = 1; - } -#endif - - *mem_end_p = PAGE_OFFSET + MEM_SIZE; - - return default_command_line; -} -#endif - -/* * initial ram disk */ #ifdef CONFIG_BLK_DEV_INITRD __initfunc(static void -setup_initrd(const struct param_struct *params)) -{ - if (params->u1.s.initrd_start) { - initrd_start = params->u1.s.initrd_start; - initrd_end = initrd_start + params->u1.s.initrd_size; - } else { - initrd_start = 0; - initrd_end = 0; - } -} - -__initfunc(static void check_initrd(unsigned long mem_start, unsigned long mem_end)) { if (initrd_end > mem_end) { @@ -276,7 +164,6 @@ check_initrd(unsigned long mem_start, unsigned long mem_end)) } #else -#define setup_initrd(p) #define check_initrd(ms,me) #endif @@ -289,48 +176,47 @@ setup_processor(void)) armidlist[armidindex].mask) armidindex += 1; - if (armidlist[armidindex].id == 0) { -#ifdef CONFIG_ARCH_ACORN - int i; - - for (i = 0; i < 3200; i++) - ((unsigned long *)SCREEN2_BASE)[i] = 0x77113322; -#endif + if (armidlist[armidindex].id == 0) while (1); - } processor = *armidlist[armidindex].proc; processor._proc_init(); } +static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; static char command_line[COMMAND_LINE_SIZE] = { 0, }; char saved_command_line[COMMAND_LINE_SIZE]; __initfunc(static void -setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_end)) +setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_sz)) { - char c, *to = command_line; + char c = ' ', *to = command_line; int len = 0; *mem_start = (unsigned long)&_end; for (;;) { - if (cmd_line[0] == ' ' && - cmd_line[1] == 'm' && - cmd_line[2] == 'e' && - cmd_line[3] == 'm' && - cmd_line[4] == '=') { - *mem_end = simple_strtoul(cmd_line+5, &cmd_line, 0); - switch(*cmd_line) { - case 'M': - case 'm': - *mem_end <<= 10; - case 'K': - case 'k': - *mem_end <<= 10; + if (c == ' ') { + if (cmd_line[0] == 'm' && + cmd_line[1] == 'e' && + cmd_line[2] == 'm' && + cmd_line[3] == '=') { + *mem_sz = simple_strtoul(cmd_line+4, &cmd_line, 0); + switch(*cmd_line) { + case 'M': + case 'm': + *mem_sz <<= 10; + case 'K': + case 'k': + *mem_sz <<= 10; + cmd_line++; + } + } + /* if there are two spaces, remove one */ + if (*cmd_line == ' ') { cmd_line++; + continue; } - *mem_end = *mem_end + PAGE_OFFSET; } c = *cmd_line++; if (!c) @@ -341,42 +227,222 @@ setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_end)) } *to = '\0'; + + /* remove trailing spaces */ + while (*--to == ' ' && to != command_line) + *to = '\0'; +} + +__initfunc(static void +setup_ram(int doload, int prompt, int image_start)) +{ +#ifdef CONFIG_BLK_DEV_RAM + extern int rd_doload; + extern int rd_prompt; + extern int rd_image_start; + + rd_image_start = image_start; + rd_prompt = prompt; + rd_doload = doload; +#endif } +/* + * initial ram disk + */ +__initfunc(static void +setup_initrd(unsigned int start, unsigned int size)) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (start) { + initrd_start = start; + initrd_end = start + size; + } else { + initrd_start = 0; + initrd_end = 0; + } +#endif +} + +#ifdef CONFIG_ARCH_ACORN +int memc_ctrl_reg; +int number_mfm_drives; +unsigned int vram_size; +#endif + +#ifndef PARAMS_BASE +#define PARAMS_BASE NULL +#endif + +static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } }; +#define ENDIANNESS ((char)endian_test.l) + __initfunc(void setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p)) { + struct param_struct *params = (struct param_struct *)PARAMS_BASE; static unsigned char smptrap; - unsigned long memory_end; - char endian = 'l'; - char *from; + unsigned long memory_end = 0; + char *from = NULL; if (smptrap == 1) return; smptrap = 1; +#if defined(CONFIG_ARCH_ARC) + __machine_arch_type = MACH_TYPE_ARCHIMEDES; +#elif defined(CONFIG_ARCH_A5K) + __machine_arch_type = MACH_TYPE_A5K; +#endif + setup_processor(); - from = setup_params(&memory_end); - setup_initrd(params); + init_task.mm->start_code = TASK_SIZE; + init_task.mm->end_code = TASK_SIZE + (unsigned long) &_etext; + init_task.mm->end_data = TASK_SIZE + (unsigned long) &_edata; + init_task.mm->brk = TASK_SIZE + (unsigned long) &_end; + + /* + * Add your machine dependencies here + */ + switch (machine_arch_type) { + case MACH_TYPE_EBSA110: + /* EBSA110 locks if we execute 'wait for interrupt' */ + disable_hlt(); + params = NULL; + break; + + case MACH_TYPE_EBSA285: + if (params) { + ORIG_X = params->u1.s.video_x; + ORIG_Y = params->u1.s.video_y; + ORIG_VIDEO_COLS = params->u1.s.video_num_cols; + ORIG_VIDEO_LINES = params->u1.s.video_num_rows; + } + break; + + case MACH_TYPE_CO285: + { +#if 0 + extern unsigned long boot_memory_end; + extern char boot_command_line[]; + + from = boot_command_line; + memory_end = boot_memory_end; +#endif + params = NULL; + } + break; + + case MACH_TYPE_CATS: + /* CATS must use soft-reboot */ + reboot_setup("s", NULL); + break; + + case MACH_TYPE_NETWINDER: + /* + * to be fixed in a future NeTTrom + */ + if (params->u1.s.page_size == 4096) { + if (params->u1.s.nr_pages != 0x2000 && + params->u1.s.nr_pages != 0x4000) { + printk("Warning: bad NeTTrom parameters detected, using defaults\n"); + /* + * This stuff doesn't appear to be initialised + * properly by NeTTrom 2.0.6 and 2.0.7 + */ + params->u1.s.nr_pages = 0x2000; /* 32MB */ + params->u1.s.ramdisk_size = 0; + params->u1.s.flags = FLAG_READONLY; + params->u1.s.initrd_start = 0; + params->u1.s.initrd_size = 0; + params->u1.s.rd_start = 0; + params->u1.s.video_x = 0; + params->u1.s.video_y = 0; + params->u1.s.video_num_cols = 80; + params->u1.s.video_num_rows = 30; + } + } else { + printk("Warning: no NeTTrom parameter page detected, using " + "compiled-in settings\n"); + params = NULL; + } + break; + + default: + break; + } + + if (params) { + memory_end = params->u1.s.page_size * + params->u1.s.nr_pages; + + ROOT_DEV = to_kdev_t(params->u1.s.rootdev); + + setup_ram((params->u1.s.flags & FLAG_RDLOAD) == 0, + (params->u1.s.flags & FLAG_RDPROMPT) == 0, + params->u1.s.rd_start); + + setup_initrd(params->u1.s.initrd_start, + params->u1.s.initrd_size); + + if (!(params->u1.s.flags & FLAG_READONLY)) + root_mountflags &= ~MS_RDONLY; + +#ifdef CONFIG_ARCH_ACORN +#ifdef CONFIG_ARCH_RPC + { + extern void init_dram_banks(struct param_struct *); + init_dram_banks(params); + } +#endif + + memc_ctrl_reg = params->u1.s.memc_control_reg; + number_mfm_drives = (params->u1.s.adfsdrives >> 3) & 3; + vram_size = 0; + + switch (params->u1.s.pages_in_vram) { + case 512: + vram_size += PAGE_SIZE * 256; + case 256: + vram_size += PAGE_SIZE * 256; + default: + break; + } + + memory_end -= vram_size; +#endif + + from = params->commandline; + } else { + ROOT_DEV = 0x00ff; + + setup_ram(1, 1, 0); + setup_initrd(0, 0); + } + + if (!memory_end) + memory_end = MEM_SIZE; + + if (!from) + from = default_command_line; + +#ifdef CONFIG_NWFPE + fpe_init(); +#endif /* Save unparsed command line copy for /proc/cmdline */ memcpy(saved_command_line, from, COMMAND_LINE_SIZE); saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; setup_mem(from, memory_start_p, &memory_end); - check_initrd(*memory_start_p, memory_end); - init_task.mm->start_code = TASK_SIZE; - init_task.mm->end_code = TASK_SIZE + (unsigned long) &_etext; - init_task.mm->end_data = TASK_SIZE + (unsigned long) &_edata; - init_task.mm->brk = TASK_SIZE + (unsigned long) &_end; + memory_end += PAGE_OFFSET; - *cmdline_p = command_line; - *memory_end_p = memory_end; + check_initrd(*memory_start_p, memory_end); - sprintf(system_utsname.machine, "%s%c", armidlist[armidindex].arch_vsn, endian); - sprintf(elf_platform, "%s%c", armidlist[armidindex].elf_vsn, endian); + sprintf(system_utsname.machine, "%s%c", armidlist[armidindex].arch_vsn, ENDIANNESS); + sprintf(elf_platform, "%s%c", armidlist[armidindex].elf_vsn, ENDIANNESS); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -385,43 +451,26 @@ setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * mem conswitchp = &dummy_con; #endif #endif + + *cmdline_p = command_line; + *memory_end_p = memory_end; } -static const struct { - char *machine_name; - char *bus_name; -} machine_desc[] = { - { "DEC-EBSA110", "DEC" }, - { "Acorn-RiscPC", "Acorn" }, - { "Nexus-NexusPCI", "PCI" }, - { "DEC-EBSA285", "PCI" }, - { "Corel-Netwinder", "PCI/ISA" }, - { "Chalice-CATS", "PCI" }, - { "unknown-TBOX", "PCI" } +static const char *machine_desc[] = { + "EBSA110", + "Acorn-RiscPC", + "unknown", + "Nexus-FTV/PCI", + "EBSA285", + "Corel-NetWinder", + "Chalice-CATS", + "unknown-TBOX", + "co-EBSA285", + "CL-PS7110", + "Acorn-Archimedes", + "Acorn-A5000" }; -#if defined(CONFIG_ARCH_ARC) -#define HARDWARE "Acorn-Archimedes" -#define IO_BUS "Acorn" -#elif defined(CONFIG_ARCH_A5K) -#define HARDWARE "Acorn-A5000" -#define IO_BUS "Acorn" -#endif - -#if defined(CONFIG_CPU_ARM2) -#define OPTIMISATION "ARM2" -#elif defined(CONFIG_CPU_ARM3) -#define OPTIMISATION "ARM3" -#elif defined(CONFIG_CPU_ARM6) -#define OPTIMISATION "ARM6" -#elif defined(CONFIG_CPU_ARM7) -#define OPTIMISATION "ARM7" -#elif defined(CONFIG_CPU_SA110) -#define OPTIMISATION "StrongARM" -#else -#define OPTIMISATION "unknown" -#endif - int get_cpuinfo(char * buffer) { int len; @@ -429,25 +478,12 @@ int get_cpuinfo(char * buffer) len = sprintf(buffer, "Processor\t: %s %s rev %d\n" "BogoMips\t: %lu.%02lu\n" - "Hardware\t: %s\n" - "Optimisation\t: %s\n" - "IO Bus\t\t: %s\n", + "Hardware\t: %s\n", armidlist[armidindex].manu, armidlist[armidindex].name, (int)processor_id & 15, (loops_per_sec+2500) / 500000, ((loops_per_sec+2500) / 5000) % 100, -#ifdef HARDWARE - HARDWARE, -#else - machine_desc[machine_type].machine_name, -#endif - OPTIMISATION, -#ifdef IO_BUS - IO_BUS -#else - machine_desc[machine_type].bus_name -#endif - ); + machine_desc[machine_arch_type]); return len; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 51e6bcb17..5ec48f752 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -28,7 +28,7 @@ asmlinkage int sys_wait4(pid_t pid, unsigned long * stat_addr, int options, unsigned long *ru); -asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs); +asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs, int syscall); extern int ptrace_cancel_bpt (struct task_struct *); extern int ptrace_set_bpt (struct task_struct *); @@ -50,7 +50,7 @@ asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t m while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(&saveset, regs)) + if (do_signal(&saveset, regs, 0)) return regs->ARM_r0; } } @@ -78,7 +78,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs) while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(&saveset, regs)) + if (do_signal(&saveset, regs, 0)) return regs->ARM_r0; } } @@ -158,12 +158,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) #ifdef CONFIG_CPU_32 err |= __get_user(regs->ARM_cpsr, &sc->arm_cpsr); #endif - if (!valid_user_regs(regs)) - return 1; - /* send SIGTRAP if we're single-stepping */ - if (ptrace_cancel_bpt (current)) - send_sig (SIGTRAP, current, 1); + err |= !valid_user_regs(regs); return err; } @@ -173,6 +169,14 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) struct sigframe *frame; sigset_t set; + /* + * Since we stacked the signal on a word boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->ARM_sp & 3) + goto badframe; + frame = (struct sigframe *)regs->ARM_sp; if (verify_area(VERIFY_READ, frame, sizeof (*frame))) @@ -192,6 +196,10 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->sc)) goto badframe; + /* Send SIGTRAP if we're single-stepping */ + if (ptrace_cancel_bpt (current)) + send_sig(SIGTRAP, current, 1); + return regs->ARM_r0; badframe: @@ -204,6 +212,14 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe *frame; sigset_t set; + /* + * Since we stacked the signal on a word boundary, + * then 'sp' should be word aligned here. If it's + * not, then the user is trying to mess with us. + */ + if (regs->ARM_sp & 3) + goto badframe; + frame = (struct rt_sigframe *)regs->ARM_sp; if (verify_area(VERIFY_READ, frame, sizeof (*frame))) @@ -220,6 +236,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; + /* Send SIGTRAP if we're single-stepping */ + if (ptrace_cancel_bpt (current)) + send_sig(SIGTRAP, current, 1); + return regs->ARM_r0; badframe: @@ -260,6 +280,26 @@ setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/ return err; } +static inline void *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + unsigned long framesize) +{ + unsigned long sp = regs->ARM_sp; + + /* + * This is the X/Open sanctioned signal stack switching. + */ + if ((ka->sa.sa_flags & SA_ONSTACK) && ! on_sig_stack(sp)) + sp = current->sas_ss_sp + current->sas_ss_size; + + /* + * No matter what happens, 'sp' must be word + * aligned otherwise nasty things could happen + */ + sp &= ~3; + + return (void *)(sp - framesize); +} + static void setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) { @@ -267,9 +307,9 @@ static void setup_frame(int sig, struct k_sigaction *ka, unsigned long retcode; int err = 0; - frame = (struct sigframe *)regs->ARM_sp - 1; + frame = get_sigframe(ka, regs, sizeof(*frame)); - if (!access_ok(VERIFT_WRITE, frame, sizeof (*frame))) + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto segv_and_exit; err |= setup_sigcontext(&frame->sc, /*&frame->fpstate,*/ regs, set->sig[0]); @@ -286,7 +326,7 @@ static void setup_frame(int sig, struct k_sigaction *ka, } else { retcode = (unsigned long)&frame->retcode; err |= __put_user(SWI_SYS_SIGRETURN, &frame->retcode); - __flush_entry_to_ram (&frame->retcode); + flush_icache_range(retcode, retcode + 4); } if (err) @@ -299,6 +339,11 @@ static void setup_frame(int sig, struct k_sigaction *ka, regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = (unsigned long)ka->sa.sa_handler; +#if defined(CONFIG_CPU_32) + /* Maybe we need to deliver a 32-bit signal to a 26-bit task. */ + if (ka->sa.sa_flags & SA_THIRTYTWO) + regs->ARM_cpsr = USR_MODE; +#endif if (valid_user_regs(regs)) return; @@ -315,7 +360,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, unsigned long retcode; int err = 0; - frame = (struct rt_sigframe *)regs->ARM_sp - 1; + frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe)); + if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto segv_and_exit; @@ -337,7 +383,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } else { retcode = (unsigned long)&frame->retcode; err |= __put_user(SWI_SYS_RT_SIGRETURN, &frame->retcode); - __flush_entry_to_ram (&frame->retcode); + flush_icache_range(retcode, retcode + 4); } if (err) @@ -350,6 +396,11 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = (unsigned long)ka->sa.sa_handler; +#if defined(CONFIG_CPU_32) + /* Maybe we need to deliver a 32-bit signal to a 26-bit task. */ + if (ka->sa.sa_flags & SA_THIRTYTWO) + regs->ARM_cpsr = USR_MODE; +#endif if (valid_user_regs(regs)) return; @@ -393,18 +444,25 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) +asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall) { - unsigned long instr, *pc = (unsigned long *)(instruction_pointer(regs)-4); struct k_sigaction *ka; siginfo_t info; - int single_stepping, swi_instr; + int single_stepping; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if (!user_mode(regs)) + return 0; if (!oldset) oldset = ¤t->blocked; single_stepping = ptrace_cancel_bpt (current); - swi_instr = (!get_user (instr, pc) && (instr & 0x0f000000) == 0x0f000000); for (;;) { unsigned long signr; @@ -503,7 +561,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) } /* Are we from a system call? */ - if (swi_instr) { + if (syscall) { switch (regs->ARM_r0) { case -ERESTARTNOHAND: regs->ARM_r0 = -EINTR; @@ -527,7 +585,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) return 1; } - if (swi_instr && + if (syscall && (regs->ARM_r0 == -ERESTARTNOHAND || regs->ARM_r0 == -ERESTARTSYS || regs->ARM_r0 == -ERESTARTNOINTR)) { diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index d50b90f8d..9da64aad0 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -223,13 +223,7 @@ out: */ asmlinkage int sys_fork(struct pt_regs *regs) { - int ret; - - lock_kernel(); - ret = do_fork(SIGCHLD, regs->ARM_sp, regs); - unlock_kernel(); - - return ret; + return do_fork(SIGCHLD, regs->ARM_sp, regs); } /* Clone a task - this clones the calling program thread. @@ -237,14 +231,14 @@ asmlinkage int sys_fork(struct pt_regs *regs) */ asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs) { - int ret; - - lock_kernel(); if (!newsp) newsp = regs->ARM_sp; - ret = do_fork(clone_flags, newsp, regs); - unlock_kernel(); - return ret; + return do_fork(clone_flags, newsp, regs); +} + +asmlinkage int sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs); } /* sys_execve() executes a new program. diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index b6448e942..c874a1ba8 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -129,27 +129,12 @@ void do_settimeofday(struct timeval *tv) time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; - sti (); + sti(); } -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick. - */ -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - if (reset_timer ()) - do_timer(regs); - - update_rtc (); -} - -static struct irqaction irqtimer = { timer_interrupt, 0, 0, "timer", NULL, NULL}; - __initfunc(void time_init(void)) { - xtime.tv_sec = setup_timer(); xtime.tv_usec = 0; - setup_arm_irq(IRQ_TIMER, &irqtimer); + setup_timer(); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5d04f325b..9267fec09 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -24,7 +24,6 @@ #include <asm/atomic.h> #include <asm/pgtable.h> -extern void die_if_kernel(char *str, struct pt_regs *regs, int err, int ret); extern void c_backtrace (unsigned long fp, int pmode); extern int ptrace_cancel_bpt (struct task_struct *); @@ -45,16 +44,17 @@ static inline void console_verbose(void) int kstack_depth_to_print = 200; -static int verify_stack_pointer (unsigned long stackptr, int size) +/* + * Stack pointers should always be within the kernels view of + * physical memory. If it is not there, then we can't dump + * out any information relating to the stack. + */ +static int verify_stack(unsigned long sp) { -#ifdef CONFIG_CPU_26 - if (stackptr < 0x02048000 || stackptr + size > 0x03000000) - return -EFAULT; -#else - if (stackptr < PAGE_OFFSET || stackptr + size > (unsigned long)high_memory) + if (sp < PAGE_OFFSET || sp > (unsigned long)high_memory) return -EFAULT; -#endif - return 0; + + return 0; } /* @@ -90,22 +90,26 @@ void dump_mem(unsigned long bottom, unsigned long top) static void dump_instr(unsigned long pc, int user) { - unsigned long module_start, module_end; int pmin = -2, pmax = 3, ok = 0; extern char start_kernel, _etext; if (!user) { + unsigned long module_start, module_end; + unsigned long kernel_start, kernel_end; + module_start = VMALLOC_START; module_end = module_start + MODULE_RANGE; - if ((pc >= (unsigned long) &start_kernel) && - (pc <= (unsigned long) &_etext)) { - if (pc + pmin < (unsigned long) &start_kernel) - pmin = ((unsigned long) &start_kernel) - pc; - if (pc + pmax > (unsigned long) &_etext) - pmax = ((unsigned long) &_etext) - pc; + kernel_start = (unsigned long)&start_kernel; + kernel_end = (unsigned long)&_etext; + + if (pc >= kernel_start && pc < kernel_end) { + if (pc + pmin < kernel_start) + pmin = kernel_start - pc; + if (pc + pmax > kernel_end) + pmax = kernel_end - pc; ok = 1; - } else if (pc >= module_start && pc <= module_end) { + } else if (pc >= module_start && pc < module_end) { if (pc + pmin < module_start) pmin = module_start - pc; if (pc + pmax > module_end) @@ -125,119 +129,138 @@ static void dump_instr(unsigned long pc, int user) printk ("pc not in code space\n"); } -static void dump_state(char *str, struct pt_regs *regs, int err) +spinlock_t die_lock; + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) { + struct task_struct *tsk = current; + + spin_lock_irq(&die_lock); + console_verbose(); printk("Internal error: %s: %x\n", str, err); printk("CPU: %d\n", smp_processor_id()); show_regs(regs); printk("Process %s (pid: %d, stackpage=%08lx)\n", - current->comm, current->pid, 4096+(unsigned long)current); + current->comm, current->pid, 4096+(unsigned long)tsk); + + if (!user_mode(regs)) { + unsigned long sp = (unsigned long)(regs + 1); + unsigned long fp; + int dump_info = 1; + + printk("Stack: "); + if (verify_stack(sp)) { + printk("invalid kernel stack pointer %08lx", sp); + dump_info = 0; + } else if (sp < 4096+(unsigned long)tsk) + printk("kernel stack pointer underflow"); + printk("\n"); + + if (dump_info) + dump_mem(sp - 16, 8192+(unsigned long)tsk); + + dump_info = 1; + + printk("Backtrace: "); + fp = regs->ARM_fp; + if (!fp) { + printk("no frame pointer"); + dump_info = 0; + } else if (verify_stack(fp)) { + printk("invalid frame pointer %08lx", fp); + dump_info = 0; + } else if (fp < 4096+(unsigned long)tsk) + printk("frame pointer underflow"); + printk("\n"); + + if (dump_info) + c_backtrace(fp, processor_mode(regs)); + + dump_instr(instruction_pointer(regs), 0); + } + + spin_unlock_irq(&die_lock); } -/* - * This function is protected against kernel-mode re-entrancy. If it - * is re-entered it will hang the system since we can't guarantee in - * this case that any of the functions that it calls are safe any more. - * Even the panic function could be a problem, but we'll give it a go. - */ -void die_if_kernel(char *str, struct pt_regs *regs, int err, int ret) +static void die_if_kernel(const char *str, struct pt_regs *regs, int err) { - static int died = 0; - unsigned long cstack, sstack, frameptr; - if (user_mode(regs)) return; - switch (died) { - case 2: - while (1); - case 1: - died ++; - panic ("die_if_kernel re-entered. Major kernel corruption. Please reboot me!"); - break; - case 0: - died ++; - break; - } - - dump_state(str, regs, err); - - cstack = (unsigned long)(regs + 1); - sstack = 4096+(unsigned long)current; - - printk("Stack: "); - if (verify_stack_pointer(cstack, 4)) - printk("invalid kernel stack pointer %08lx", cstack); - else if(cstack > sstack + 4096) - printk("(sp overflow)"); - else if(cstack < sstack) - printk("(sp underflow)"); - printk("\n"); - - dump_mem(cstack - 16, sstack + 4096); - - frameptr = regs->ARM_fp; - if (frameptr) { - if (verify_stack_pointer (frameptr, 4)) - printk ("Backtrace: invalid frame pointer\n"); - else { - printk("Backtrace: \n"); - c_backtrace (frameptr, processor_mode(regs)); - } - } - - dump_instr(instruction_pointer(regs), 0); - died = 0; - if (ret != -1) - do_exit (ret); - else { - cli (); - while (1); - } + die(str, regs, err); } -void bad_user_access_alignment (const void *ptr) +void bad_user_access_alignment(const void *ptr) { - void *pc; - __asm__("mov %0, lr\n": "=r" (pc)); - printk (KERN_ERR "bad_user_access_alignment called: ptr = %p, pc = %p\n", ptr, pc); + printk(KERN_ERR "bad user access alignment: ptr = %p, pc = %p\n", ptr, + __builtin_return_address(0)); current->tss.error_code = 0; current->tss.trap_no = 11; - force_sig (SIGBUS, current); -/* die_if_kernel("Oops - bad user access alignment", regs, mode, SIGBUS);*/ + force_sig(SIGBUS, current); +/* die_if_kernel("Oops - bad user access alignment", regs, mode);*/ } -asmlinkage void do_undefinstr (int address, struct pt_regs *regs, int mode) +asmlinkage void do_undefinstr(int address, struct pt_regs *regs, int mode) { +#ifdef CONFIG_DEBUG_USER + printk(KERN_INFO "%s (%d): undefined instruction: pc=%08lx\n", + current->comm, current->pid, instruction_pointer(regs)); +#endif current->tss.error_code = 0; current->tss.trap_no = 6; - force_sig (SIGILL, current); - die_if_kernel("Oops - undefined instruction", regs, mode, SIGILL); + force_sig(SIGILL, current); + die_if_kernel("Oops - undefined instruction", regs, mode); } -asmlinkage void do_excpt (int address, struct pt_regs *regs, int mode) +asmlinkage void do_excpt(int address, struct pt_regs *regs, int mode) { +#ifdef CONFIG_DEBUG_USER + printk(KERN_INFO "%s (%d): address exception: pc=%08lx\n", + current->comm, current->pid, instruction_pointer(regs)); +#endif current->tss.error_code = 0; current->tss.trap_no = 11; - force_sig (SIGBUS, current); - die_if_kernel("Oops - address exception", regs, mode, SIGBUS); + force_sig(SIGBUS, current); + die_if_kernel("Oops - address exception", regs, mode); } asmlinkage void do_unexp_fiq (struct pt_regs *regs) { #ifndef CONFIG_IGNORE_FIQ - printk ("Hmm. Unexpected FIQ received, but trying to continue\n"); - printk ("You may have a hardware problem...\n"); + printk("Hmm. Unexpected FIQ received, but trying to continue\n"); + printk("You may have a hardware problem...\n"); #endif } +/* + * bad_mode handles the impossible case in the vectors. + * If you see one of these, then it's extremely serious, + * and could mean you have buggy hardware. It never + * returns, and never tries to sync. We hope that we + * can dump out some state information... + */ asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode) { - printk (KERN_CRIT "Bad mode in %s handler detected: mode %s\n", - handler[reason], - processor_modes[proc_mode]); - die_if_kernel ("Oops", regs, 0, -1); + console_verbose(); + + printk(KERN_CRIT "Bad mode in %s handler detected: mode %s\n", + handler[reason], processor_modes[proc_mode]); + + /* + * Dump out the vectors and stub routines + */ + printk(KERN_CRIT "Vectors:\n"); + dump_mem(0, 0x40); + printk(KERN_CRIT "Stubs:\n"); + dump_mem(0x200, 0x4b8); + + die("Oops", regs, 0); + cli(); + while(1); } /* @@ -249,54 +272,85 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode) */ asmlinkage void math_state_restore (void) { - current->used_math = 1; + current->used_math = 1; } -asmlinkage void arm_syscall (int no, struct pt_regs *regs) +asmlinkage int arm_syscall (int no, struct pt_regs *regs) { switch (no) { case 0: /* branch through 0 */ force_sig(SIGSEGV, current); -// if (user_mode(regs)) { -// dump_state("branch through zero", regs, 0); -// if (regs->ARM_fp) -// c_backtrace (regs->ARM_fp, processor_mode(regs)); -// } - die_if_kernel ("branch through zero", regs, 0, SIGSEGV); + die_if_kernel("branch through zero", regs, 0); break; case 1: /* SWI_BREAK_POINT */ regs->ARM_pc -= 4; /* Decrement PC by one instruction */ - ptrace_cancel_bpt (current); - force_sig (SIGTRAP, current); + ptrace_cancel_bpt(current); + force_sig(SIGTRAP, current); + return regs->ARM_r0; + + case 2: /* sys_cacheflush */ +#ifdef CONFIG_CPU_32 + /* r0 = start, r1 = length, r2 = flags */ + processor.u.armv3v4._flush_cache_area(regs->ARM_r0, + regs->ARM_r1, + 1); +#endif break; default: - printk ("[%d] %s: arm syscall %d\n", current->pid, current->comm, no); - force_sig (SIGILL, current); + /* Calls 9f00xx..9f07ff are defined to return -ENOSYS + if not implemented, rather than raising SIGILL. This + way the calling program can gracefully determine whether + a feature is supported. */ + if (no <= 0x7ff) + return -ENOSYS; +#ifdef CONFIG_DEBUG_USER + /* experiance shows that these seem to indicate that + * something catastrophic has happened + */ + printk("[%d] %s: arm syscall %d\n", current->pid, current->comm, no); if (user_mode(regs)) { - show_regs (regs); - c_backtrace (regs->ARM_fp, processor_mode(regs)); + show_regs(regs); + c_backtrace(regs->ARM_fp, processor_mode(regs)); } - die_if_kernel ("Oops", regs, no, SIGILL); +#endif + force_sig(SIGILL, current); + die_if_kernel("Oops", regs, no); break; } + return 0; } asmlinkage void deferred(int n, struct pt_regs *regs) { - dump_state("old system call", regs, n); - force_sig (SIGILL, current); + /* You might think just testing `handler' would be enough, but PER_LINUX + * points it to no_lcall7 to catch undercover SVr4 binaries. Gutted. + */ + if (current->personality != PER_LINUX && current->exec_domain->handler) { + /* Hand it off to iBCS. The extra parameter and consequent type + * forcing is necessary because of the weird ARM calling convention. + */ + void (*handler)(int nr, struct pt_regs *regs) = (void *)current->exec_domain->handler; + (*handler)(n, regs); + return; + } + +#ifdef CONFIG_DEBUG_USER + printk(KERN_ERR "[%d] %s: old system call.\n", current->pid, + current->comm); +#endif + force_sig(SIGILL, current); } asmlinkage void arm_malalignedptr(const char *str, void *pc, volatile void *ptr) { - printk ("Mal-aligned pointer in %s: %p (PC=%p)\n", str, ptr, pc); + printk("Mal-aligned pointer in %s: %p (PC=%p)\n", str, ptr, pc); } -asmlinkage void arm_invalidptr (const char *function, int size) +asmlinkage void arm_invalidptr(const char *function, int size) { - printk ("Invalid pointer size in %s (PC=%p) size %d\n", + printk("Invalid pointer size in %s (pc=%p) size %d\n", function, __builtin_return_address(0), size); } diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 684db2a47..0b241d333 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,14 +6,14 @@ L_TARGET := lib.a L_OBJS := backtrace.o bitops.o checksum.o delay.o io.o memcpy.o \ - system.o string.o uaccess.o + semaphore.o string.o system.o uaccess.o ifeq ($(PROCESSOR),armo) L_OBJS += uaccess-armo.o endif ifdef CONFIG_ARCH_ACORN - L_OBJS += loaders.o ll_char_wr.o io-acorn.o + L_OBJS += loaders.o io-acorn.o ifdef CONFIG_ARCH_A5K L_OBJS += floppydma.o endif @@ -26,12 +26,8 @@ ifeq ($(MACHINE),ebsa110) L_OBJS += io-ebsa110.o endif -ifeq ($(MACHINE),vnc) - L_OBJS += io-ebsa285.o -endif - -ifeq ($(MACHINE),ebsa285) - L_OBJS += io-ebsa285.o +ifeq ($(MACHINE),footbridge) + L_OBJS += io-footbridge.o endif include $(TOPDIR)/Rules.make @@ -45,10 +41,4 @@ getconsdata.o: getconsdata.c checksum.o: constants.h %.o: %.S -ifneq ($(CONFIG_BINUTILS_NEW),y) - $(CC) $(CFLAGS) -D__ASSEMBLY__ -E $< | tr ';$$' '\n#' > ..tmp.$<.s - $(CC) $(CFLAGS:-pipe=) -c -o $@ ..tmp.$<.s - $(RM) ..tmp.$<.s -else $(CC) $(CFLAGS) -D__ASSEMBLY__ -c -o $@ $< -endif diff --git a/arch/arm/lib/checksum.S b/arch/arm/lib/checksum.S index bd5c78d34..daf49fc94 100644 --- a/arch/arm/lib/checksum.S +++ b/arch/arm/lib/checksum.S @@ -520,13 +520,13 @@ Ldst_aligned: tst r0, #3 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) ldr r4, [r0], #4 tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 mov r4, r4, lsr #8 strb r4, [r1], #1 mov r4, r4, lsr #8 - b Lexit + b Lexit_r4 Ltoo_small: teq r2, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) @@ -538,10 +538,12 @@ Ltoo_small: teq r2, #0 adds r3, r3, ip strb ip, [r1], #1 strb r8, [r1], #1 -Lexit: tst r2, #1 -Ltoo_small1: ldrneb ip, [r0], #1 - strneb ip, [r1], #1 - adcnes r3, r3, ip + tst r2, #1 +Ltoo_small1: ldrneb r4, [r0], #1 +Lexit_r4: tst r2, #1 + strneb r4, [r1], #1 + andne r4, r4, #255 + adcnes r3, r3, r4 adcs r0, r3, #0 LOADREGS(ea,fp,{r4 - r8, fp, sp, pc}) @@ -598,13 +600,13 @@ Lsrc_not_aligned: adceq r0, r3, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 mov r4, r4, lsr #8 strb r4, [r1], #1 mov r4, r4, lsr #8 - b Lexit + b Lexit_r4 Lsrc2_aligned: mov r4, r4, lsr #16 adds r3, r3, #0 @@ -650,13 +652,13 @@ Lsrc2_aligned: mov r4, r4, lsr #16 adceq r0, r3, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 mov r4, r4, lsr #8 strb r4, [r1], #1 ldrb r4, [r0], #1 - b Lexit + b Lexit_r4 Lsrc3_aligned: mov r4, r4, lsr #24 adds r3, r3, #0 @@ -702,14 +704,14 @@ Lsrc3_aligned: mov r4, r4, lsr #24 adceq r0, r3, #0 LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc}) tst r2, #2 - beq Lexit + beq Lexit_r4 adcs r3, r3, r4, lsl #16 strb r4, [r1], #1 ldr r4, [r0], #4 strb r4, [r1], #1 adcs r3, r3, r4, lsl #24 mov r4, r4, lsr #8 - b Lexit + b Lexit_r4 ENTRY(__csum_ipv6_magic) stmfd sp!, {lr} diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S index 08fdccb27..778d3e574 100644 --- a/arch/arm/lib/floppydma.S +++ b/arch/arm/lib/floppydma.S @@ -26,32 +26,3 @@ ENTRY(floppy_fiqout_start) strb r12, [r11, #-4] subs pc, lr, #4 SYMBOL_NAME(floppy_fiqout_end): - -@ Params: -@ r0 = length -@ r1 = address -@ r2 = floppy port -@ Puts these into R9_fiq, R10_fiq, R11_fiq -ENTRY(floppy_fiqsetup) - mov ip, sp - stmfd sp!, {fp, ip, lr, pc} - sub fp, ip, #4 - MODE(r3,ip,I_BIT|F_BIT|DEFAULT_FIQ) @ disable FIQs, IRQs, FIQ mode - mov r0, r0 - mov r9, r0 - mov r10, r1 - mov r11, r2 - RESTOREMODE(r3) @ back to normal - mov r0, r0 - LOADREGS(ea,fp,{fp, sp, pc}) - -ENTRY(floppy_fiqresidual) - mov ip, sp - stmfd sp!, {fp, ip, lr, pc} - sub fp, ip, #4 - MODE(r3,ip,I_BIT|F_BIT|DEFAULT_FIQ) @ disable FIQs, IRQs, FIQ mode - mov r0, r0 - mov r0, r9 - RESTOREMODE(r3) - mov r0, r0 - LOADREGS(ea,fp,{fp, sp, pc}) diff --git a/arch/arm/lib/getconsdata.c b/arch/arm/lib/getconsdata.c index ba145eaff..27f4ca2ef 100644 --- a/arch/arm/lib/getconsdata.c +++ b/arch/arm/lib/getconsdata.c @@ -67,6 +67,23 @@ unsigned long PAGE_OLD = _PAGE_OLD; unsigned long PAGE_CLEAN = _PAGE_CLEAN; #endif +#ifdef PTE_TYPE_SMALL +unsigned long HPTE_TYPE_SMALL = PTE_TYPE_SMALL; +unsigned long HPTE_AP_READ = PTE_AP_READ; +unsigned long HPTE_AP_WRITE = PTE_AP_WRITE; +#endif + +#ifdef L_PTE_PRESENT +unsigned long LPTE_PRESENT = L_PTE_PRESENT; +unsigned long LPTE_YOUNG = L_PTE_YOUNG; +unsigned long LPTE_BUFFERABLE = L_PTE_BUFFERABLE; +unsigned long LPTE_CACHEABLE = L_PTE_CACHEABLE; +unsigned long LPTE_USER = L_PTE_USER; +unsigned long LPTE_WRITE = L_PTE_WRITE; +unsigned long LPTE_EXEC = L_PTE_EXEC; +unsigned long LPTE_DIRTY = L_PTE_DIRTY; +#endif + unsigned long KSWI_BASE = 0x900000; unsigned long KSWI_SYS_BASE = 0x9f0000; unsigned long SYS_ERROR0 = 0x9f0000; diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S index 6baa4cd50..bf2dd6333 100644 --- a/arch/arm/lib/io-acorn.S +++ b/arch/arm/lib/io-acorn.S @@ -11,50 +11,514 @@ .text .align -#define OUT(reg) \ - mov r8, reg, lsl $16 ;\ - orr r8, r8, r8, lsr $16 ;\ - str r8, [r3, r0, lsl $2] ;\ - mov r8, reg, lsr $16 ;\ - orr r8, r8, r8, lsl $16 ;\ - str r8, [r3, r0, lsl $2] - -#define IN(reg) \ - ldr reg, [r0] ;\ - and reg, reg, ip ;\ - ldr lr, [r0] ;\ - orr reg, reg, lr, lsl $16 - - .equ pcio_base_high, PCIO_BASE & 0xff000000 - .equ pcio_base_low, PCIO_BASE & 0x00ff0000 - .equ io_base_high, IO_BASE & 0xff000000 - .equ io_base_low, IO_BASE & 0x00ff0000 - - .equ addr_io_diff_hi, pcio_base_high - io_base_high - .equ addr_io_diff_lo, pcio_base_low - io_base_low - - .macro addr reg, off - tst \off, #0x80000000 - .if addr_io_diff_hi - movne \reg, #IO_BASE - moveq \reg, #pcio_base_high - .if pcio_base_low - addeq \reg, \reg, #pcio_base_low - .endif - .else - mov \reg, #IO_BASE - addeq \reg, \reg, #addr_io_diff_lo - .endif + .equ diff_pcio_base, PCIO_BASE - IO_BASE + + .macro outw2 rd + mov r8, \rd, lsl #16 + orr r8, r8, r8, lsr #16 + str r8, [r3, r0, lsl #2] + mov r8, \rd, lsr #16 + orr r8, r8, r8, lsl #16 + str r8, [r3, r0, lsl #2] + .endm + + .macro inw2 rd, mask, temp + ldr \rd, [r0] + and \rd, \rd, \mask + ldr \temp, [r0] + orr \rd, \rd, \temp, lsl #16 .endm -@ Purpose: read a block of data from a hardware register to memory. -@ Proto : insw(int from_port, void *to, int len_in_words); -@ Proto : inswb(int from_port, void *to, int len_in_bytes); -@ Notes : increment to + .macro addr rd + tst \rd, #0x80000000 + mov \rd, \rd, lsl #2 + add \rd, \rd, #IO_BASE + addeq \rd, \rd, #diff_pcio_base + .endm + +.iosw_bad_align_msg: + .ascii "insw: bad buffer alignment (%p), called from %08lX\n\0" +.iosl_warning: + .ascii "<4>insl/outsl not implemented, called from %08lX\0" + .align + +/* + * These make no sense on Acorn machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) + adr r0, .iosl_warning + mov r1, lr + b SYMBOL_NAME(printk) + +.iosw_bad_alignment: + adr r0, .iosw_bad_align_msg + mov r2, lr + b SYMBOL_NAME(panic) + + +/* Purpose: read a block of data from a hardware register to memory. + * Proto : void insw(int from_port, void *to, int len_in_words); + * Notes : increment to, 'to' must be 16-bit aligned + */ + +.insw_align: tst r1, #1 + bne .iosw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + bne .insw_aligned ENTRY(insw) + teq r2, #0 + RETINSTR(moveq,pc,lr) + addr r0 + tst r1, #3 + bne .insw_align + +.insw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_insw_8 + +.insw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + subs r2, r2, #8 + bpl .insw_8_lp + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_insw_8: tst r2, #4 + beq .no_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.no_insw_4: tst r2, #2 + beq .no_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.no_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + LOADREGS(fd, sp!, {r4, r5, r6, pc}) + +@ Purpose: write a block of data from memory to a hardware register. +@ Proto : outsw(int to_reg, void *from, int len_in_words); +@ Notes : increments from + +.outsw_align: tst r1, #1 + bne .iosw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + bne .outsw_aligned + +ENTRY(outsw) + teq r2, #0 + RETINSTR(moveq,pc,lr) + addr r0 + tst r1, #3 + bne .outsw_align + +.outsw_aligned: stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .no_outsw_8 +.outsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .outsw_8_lp + tst r2, #7 + LOADREGS(eqfd, sp!, {r4, r5, r6, pc}) + +.no_outsw_8: tst r2, #4 + beq .no_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_4: tst r2, #2 + beq .no_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.no_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + LOADREGS(fd, sp!, {r4, r5, r6, pc}) + +.insb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1], #1 + subs r2, r2, ip + bne .insb_aligned + +ENTRY(insb) + teq r2, #0 + moveq pc, lr + addr r0 + ands ip, r1, #3 + bne .insb_align + +.insb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .insb_no_16 + +.insb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + ldrb r4, [r0] + ldrb r5, [r0] + orr r4, r4, r5, lsl #8 + ldrb r5, [r0] + orr r4, r4, r5, lsl #16 + ldrb r5, [r0] + orr r4, r4, r5, lsl #24 + ldrb r5, [r0] + ldrb r6, [r0] + orr r5, r5, r6, lsl #8 + ldrb r6, [r0] + orr r5, r5, r6, lsl #16 + ldrb r6, [r0] + orr r5, r5, r6, lsl #24 + ldrb r6, [r0] + ldrb ip, [r0] + orr r6, r6, ip, lsl #8 + ldrb ip, [r0] + orr r6, r6, ip, lsl #16 + ldrb ip, [r0] + orr r6, r6, ip, lsl #24 + stmia r1!, {r3 - r6} + subs r2, r2, #16 + bpl .insb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.insb_no_16: tst r2, #8 + beq .insb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + ldrb r4, [r0] + ldrb r5, [r0] + orr r4, r4, r5, lsl #8 + ldrb r5, [r0] + orr r4, r4, r5, lsl #16 + ldrb r5, [r0] + orr r4, r4, r5, lsl #24 + stmia r1!, {r3, r4} + +.insb_no_8: tst r2, #4 + bne .insb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + orr r3, r3, r4, lsl #8 + ldrb r4, [r0] + orr r3, r3, r4, lsl #16 + ldrb r4, [r0] + orr r3, r3, r4, lsl #24 + str r3, [r1], #4 + +.insb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1] + LOADREGS(fd, sp!, {r4 - r6, pc}) + + + +.outsb_align: rsb ip, ip, #4 + cmp ip, r2 + mov ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1], #1 + strgtb r3, [r0] + subs r2, r2, ip + bne .outsb_aligned + +ENTRY(outsb) + teq r2, #0 + moveq pc, lr + addr r0 + ands ip, r1, #3 + bne .outsb_align + +.outsb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .outsb_no_16 + +.outsb_16_lp: ldmia r1!, {r3 - r6} + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + mov r5, r5, lsr #8 + strb r5, [r0] + + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + mov r6, r6, lsr #8 + strb r6, [r0] + subs r2, r2, #16 + bpl .outsb_16_lp + + tst r2, #15 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + +.outsb_no_16: tst r2, #8 + beq .outsb_no_8 + + ldmia r1, {r3, r4} + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + mov r4, r4, lsr #8 + strb r4, [r0] + +.outsb_no_8: tst r2, #4 + bne .outsb_no_4 + + ldr r3, [r1], #4 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + mov r3, r3, lsr #8 + strb r3, [r0] + +.outsb_no_4: ands r2, r2, #3 + LOADREGS(eqfd, sp!, {r4 - r6, pc}) + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1] + strgtb r3, [r0] + LOADREGS(fd, sp!, {r4 - r6, pc}) + + + + +@ Purpose: write a memc register +@ Proto : void memc_write(int register, int value); +@ Returns: nothing + +#if defined(CONFIG_CPU_26) +ENTRY(memc_write) + cmp r0, #7 + RETINSTR(movgt,pc,lr) + mov r0, r0, lsl #17 + mov r1, r1, lsl #15 + mov r1, r1, lsr #17 + orr r0, r0, r1, lsl #2 + add r0, r0, #0x03600000 + strb r0, [r0] + RETINSTR(mov,pc,lr) +#define CPSR2SPSR(rt) +#else +#define CPSR2SPSR(rt) \ + mrs rt, cpsr; \ + msr spsr, rt +#endif + +@ Purpose: call an expansion card loader to read bytes. +@ Proto : char read_loader(int offset, char *card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_read) + stmfd sp!, {r4 - r12, lr} + mov r11, r1 + mov r1, r0 + CPSR2SPSR(r0) + mov lr, pc + mov pc, r2 + LOADREGS(fd, sp!, {r4 - r12, pc}) + +@ Purpose: call an expansion card loader to reset the card +@ Proto : void read_loader(int card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_reset) + stmfd sp!, {r4 - r12, lr} + mov r11, r0 + CPSR2SPSR(r0) + mov lr, pc + add pc, r1, #8 + LOADREGS(fd, sp!, {r4 - r12, pc}) + + +#if 0 mov r2, r2, lsl#1 -ENTRY(inswb) mov ip, sp stmfd sp!, {r4 - r10, fp, ip, lr, pc} sub fp, ip, #4 @@ -122,14 +586,9 @@ Linsw_notaligned: bgt Linsw_notaligned LOADREGS(ea, fp, {r4 - r10, fp, sp, pc}) -@ Purpose: write a block of data from memory to a hardware register. -@ Proto : outsw(int to_reg, void *from, int len_in_words); -@ Proto : outswb(int to_reg, void *from, int len_in_bytes); -@ Notes : increments from ENTRY(outsw) - mov r2, r2, LSL#1 -ENTRY(outswb) + mov r2, r2, lsl#1 mov ip, sp stmfd sp!, {r4 - r8, fp, ip, lr, pc} sub fp, ip, #4 @@ -166,56 +625,5 @@ ENTRY(outswb) bgt 3b LOADREGS(ea, fp, {r4 - r8, fp, sp, pc}) -/* - * These make no sense on Acorn machines atm. - */ -ENTRY(insl) -ENTRY(outsl) - RETINSTR(mov,pc,lr) - -@ Purpose: write a memc register -@ Proto : void memc_write(int register, int value); -@ Returns: nothing - -#if defined(CONFIG_CPU_26) -ENTRY(memc_write) - cmp r0, #7 - RETINSTR(movgt,pc,lr) - mov r0, r0, lsl #17 - mov r1, r1, lsl #15 - mov r1, r1, lsr #17 - orr r0, r0, r1, lsl #2 - add r0, r0, #0x03600000 - strb r0, [r0] - RETINSTR(mov,pc,lr) -#define CPSR2SPSR(rt) -#else -#define CPSR2SPSR(rt) \ - mrs rt, cpsr; \ - msr spsr, rt #endif -@ Purpose: call an expansion card loader to read bytes. -@ Proto : char read_loader(int offset, char *card_base, char *loader); -@ Returns: byte read - -ENTRY(ecard_loader_read) - stmfd sp!, {r4 - r12, lr} - mov r11, r1 - mov r1, r0 - CPSR2SPSR(r0) - mov lr, pc - mov pc, r2 - LOADREGS(fd, sp!, {r4 - r12, pc}) - -@ Purpose: call an expansion card loader to reset the card -@ Proto : void read_loader(int card_base, char *loader); -@ Returns: byte read - -ENTRY(ecard_loader_reset) - stmfd sp!, {r4 - r12, lr} - mov r11, r0 - CPSR2SPSR(r0) - mov lr, pc - add pc, r1, #8 - LOADREGS(fd, sp!, {r4 - r12, pc}) diff --git a/arch/arm/lib/io-ebsa110.S b/arch/arm/lib/io-ebsa110.S index e0b8229a4..b29276ff7 100644 --- a/arch/arm/lib/io-ebsa110.S +++ b/arch/arm/lib/io-ebsa110.S @@ -22,6 +22,22 @@ ldr lr, [r0] ;\ orr reg, reg, lr, lsl $16 +/* + * These make no sense on these machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) +ENTRY(insb) +ENTRY(outsb) + adr r0, io_long_warning + mov r1, lr + b SYMBOL_NAME(printk) + +io_long_warning: + .ascii "<4>ins?/outs? not implemented on this architecture\0" + .align + @ Purpose: read a block of data from a hardware register to memory. @ Proto : insw(int from_port, void *to, int len_in_words); @ Proto : inswb(int from_port, void *to, int len_in_bytes); diff --git a/arch/arm/lib/io-ebsa285.S b/arch/arm/lib/io-footbridge.S index a86983d43..0734c6042 100644 --- a/arch/arm/lib/io-ebsa285.S +++ b/arch/arm/lib/io-footbridge.S @@ -1,8 +1,16 @@ #include <linux/linkage.h> +#include <asm/hardware.h> + + .equ pcio_high, PCIO_BASE & 0xff000000 + .equ pcio_low, PCIO_BASE & 0x00ffffff + + .macro ioaddr, rd,rn + add \rd, \rn, #pcio_high + add \rd, \rd, #pcio_low + .endm ENTRY(insl) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 ands ip, r1, #3 bne 2f @@ -14,49 +22,48 @@ ENTRY(insl) 2: cmp ip, #2 ldr ip, [r0] - blt 3f - bgt 4f + blt 4f + bgt 6f strh ip, [r1], #2 mov ip, ip, lsr #16 -1: subs r2, r2, #1 +3: subs r2, r2, #1 ldrne r3, [r0] orrne ip, ip, r3, lsl #16 strne ip, [r1], #4 movne ip, r3, lsr #16 - bne 1b + bne 3b strh ip, [r1], #2 mov pc, lr -3: strb ip, [r1], #1 +4: strb ip, [r1], #1 mov ip, ip, lsr #8 strh ip, [r1], #2 mov ip, ip, lsr #16 -1: subs r2, r2, #1 +5: subs r2, r2, #1 ldrne r3, [r0] orrne ip, ip, r3, lsl #8 strne ip, [r1], #4 movne ip, r3, lsr #24 - bne 1b + bne 5b strb ip, [r1], #1 mov pc, lr -4: strb ip, [r1], #1 +6: strb ip, [r1], #1 mov ip, ip, lsr #8 -1: subs r2, r2, #1 +7: subs r2, r2, #1 ldrne r3, [r0] orrne ip, ip, r3, lsl #24 strne ip, [r1], #4 movne ip, r3, lsr #8 - bne 1b + bne 7b strb ip, [r1], #1 mov ip, ip, lsr #8 strh ip, [r1], #2 mov pc, lr ENTRY(outsl) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 ands ip, r1, #3 bne 2f @@ -70,31 +77,31 @@ ENTRY(outsl) cmp ip, #2 ldr ip, [r1], #4 mov ip, ip, lsr #16 - blt 3f - bgt 4f + blt 4f + bgt 5f -1: ldr r3, [r1], #4 +3: ldr r3, [r1], #4 orr ip, ip, r3, lsl #16 str ip, [r0] mov ip, r3, lsr #16 subs r2, r2, #1 - bne 1b + bne 3b mov pc, lr -3: ldr r3, [r1], #4 +4: ldr r3, [r1], #4 orr ip, ip, r3, lsl #8 str ip, [r0] mov ip, r3, lsr #24 subs r2, r2, #1 - bne 3b + bne 4b mov pc, lr -4: ldr r3, [r1], #4 +5: ldr r3, [r1], #4 orr ip, ip, r3, lsl #24 str ip, [r0] mov ip, r3, lsr #8 subs r2, r2, #1 - bne 4b + bne 5b mov pc, lr /* Nobody could say these are optimal, but not to worry. */ @@ -102,8 +109,7 @@ ENTRY(outsl) ENTRY(outswb) mov r2, r2, lsr #1 ENTRY(outsw) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 1: subs r2, r2, #1 ldrgeh r3, [r1], #2 strgeh r3, [r0] @@ -114,8 +120,7 @@ ENTRY(inswb) mov r2, r2, lsr #1 ENTRY(insw) stmfd sp!, {r4, r5, lr} - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 @ + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 subs ip, r2, #8 blo too_little @@ -176,8 +181,7 @@ too_little: subs r2, r2, #1 ENTRY(insb) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 1: teq r2, #0 ldrneb r3, [r0] strneb r3, [r1], #1 @@ -187,8 +191,7 @@ ENTRY(insb) ENTRY(outsb) - add r0, r0, #0xff000000 - add r0, r0, #0x00e00000 + ioaddr r0, r0 1: teq r2, #0 ldrneb r3, [r1], #1 strneb r3, [r0] diff --git a/arch/arm/lib/io.c b/arch/arm/lib/io.c index d01877934..c94a2ba07 100644 --- a/arch/arm/lib/io.c +++ b/arch/arm/lib/io.c @@ -18,7 +18,7 @@ void _memcpy_fromio(void * to, unsigned long from, unsigned long count) * Copy data from "real" memory space to IO memory space. * This needs to be optimized. */ -void _memcpy_toio(unsigned long to, void * from, unsigned long count) +void _memcpy_toio(unsigned long to, const void * from, unsigned long count) { while (count) { count--; diff --git a/arch/arm/lib/semaphore.S b/arch/arm/lib/semaphore.S new file mode 100644 index 000000000..778fafc1c --- /dev/null +++ b/arch/arm/lib/semaphore.S @@ -0,0 +1,34 @@ +/* + * linux/arch/arm/lib/semaphore.S + * + * Idea from i386 code, Copyright Linus Torvalds. + * Converted for ARM by Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * The semaphore operations have a special calling sequence + * that allows us to keep the distruption of the main code + * path to a minimum. These routines save and restore the + * registers that will be touched by __down etc. + */ +ENTRY(__down_failed) + stmfd sp!, {r0 - r3, ip, lr} + bl SYMBOL_NAME(__down) + LOADREGS(fd, sp!, {r0 - r3, ip, pc}) + +ENTRY(__down_interruptible_failed) + stmfd sp!, {r1 - r3, ip, lr} + bl SYMBOL_NAME(__down_interruptible) + LOADREGS(fd, sp!, {r1 - r3, ip, pc}) + +ENTRY(__down_trylock_failed) + stmfd sp!, {r1 - r3, ip, lr} + bl SYMBOL_NAME(__down_trylock) + LOADREGS(fd, sp!, {r1 - r3, ip, pc}) + +ENTRY(__up_wakeup) + stmfd sp!, {r0 - r3, ip, lr} + bl SYMBOL_NAME(__up) + LOADREGS(fd, sp!, {r0 - r3, ip, pc}) diff --git a/arch/arm/mm/fault-common.c b/arch/arm/mm/fault-common.c index 810dea699..1251525da 100644 --- a/arch/arm/mm/fault-common.c +++ b/arch/arm/mm/fault-common.c @@ -26,25 +26,14 @@ void __bad_pmd_kernel(pmd_t *pmd) set_pmd(pmd, mk_kernel_pmd(BAD_PAGETABLE)); } -static void -kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs, - struct task_struct *tsk, struct mm_struct *mm) +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) { - char *reason; - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ pgd_t *pgd; - if (addr < PAGE_SIZE) - reason = "NULL pointer dereference"; - else - reason = "paging request"; - - printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n", - reason, addr); - printk(KERN_ALERT "memmap = %08lX, pgd = %p\n", tsk->tss.memmap, mm->pgd); pgd = pgd_offset(mm, addr); printk(KERN_ALERT "*pgd = %08lx", pgd_val(*pgd)); @@ -77,6 +66,27 @@ kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs, } while(0); printk("\n"); +} + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +static void +kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs, + struct task_struct *tsk, struct mm_struct *mm) +{ + char *reason; + + if (addr < PAGE_SIZE) + reason = "NULL pointer dereference"; + else + reason = "paging request"; + + printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n", + reason, addr); + printk(KERN_ALERT "memmap = %08lX, pgd = %p\n", tsk->tss.memmap, mm->pgd); + show_pte(mm, addr); die("Oops", regs, mode); do_exit(SIGKILL); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 70d7c77b9..48e34214e 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -115,19 +115,19 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag { void * addr; struct vm_struct * area; - unsigned long offset; + unsigned long offset, last_addr; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; /* * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - size = PAGE_ALIGN(size + offset); - - /* - * Don't allow mappings that wrap.. - */ - if (!size || size > phys_addr + size) - return NULL; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - phys_addr; /* * Ok, go for it.. diff --git a/arch/arm/mm/proc-arm2,3.S b/arch/arm/mm/proc-arm2,3.S index 263d79708..7e4871fe2 100644 --- a/arch/arm/mm/proc-arm2,3.S +++ b/arch/arm/mm/proc-arm2,3.S @@ -202,15 +202,11 @@ _arm2_3_check_bugs: LC0: .word SYMBOL_NAME(page_nr) /* * Function: arm2_switch_to (struct task_struct *prev, struct task_struct *next) - * * Params : prev Old task structure * : next New task structure for process to run - * * Returns : prev - * * Purpose : Perform a task switch, saving the old processes state, and restoring * the new. - * * Notes : We don't fiddle with the FP registers here - we postpone this until * the new task actually uses FP. This way, we don't swap FP for tasks * that do not require it. @@ -316,15 +312,11 @@ _arm2_proc_init: _arm2_proc_fin: movs pc, lr /* * Function: arm3_switch_to (struct task_struct *prev, struct task_struct *next) - * * Params : prev Old task structure * : next New task structure for process to run - * * Returns : prev - * * Purpose : Perform a task switch, saving the old processes state, and restoring * the new. - * * Notes : We don't fiddle with the FP registers here - we postpone this until * the new task actually uses FP. This way, we don't swap FP for tasks * that do not require it. diff --git a/arch/arm/mm/proc-arm6,7.S b/arch/arm/mm/proc-arm6,7.S index b817ae2b4..d1f31e35d 100644 --- a/arch/arm/mm/proc-arm6,7.S +++ b/arch/arm/mm/proc-arm6,7.S @@ -74,14 +74,14 @@ _arm6_7_switch_to: str sp, [r0, #TSS_SAVE] @ Save sp_SVC ldr sp, [r1, #TSS_SAVE] @ Get saved sp_SVC ldr r2, [r1, #TSK_ADDR_LIMIT] + ldr r3, [r1, #TSS_MEMMAP] @ Page table pointer teq r2, #0 moveq r2, #DOM_KERNELDOMAIN movne r2, #DOM_USERDOMAIN mcr p15, 0, r2, c3, c0 @ Set domain reg - ldr r2, [r1, #TSS_MEMMAP] @ Page table pointer mov r1, #0 mcr p15, 0, r1, c7, c0, 0 @ flush cache - mcr p15, 0, r2, c2, c0, 0 @ update page table ptr + mcr p15, 0, r3, c2, c0, 0 @ update page table ptr mcr p15, 0, r1, c5, c0, 0 @ flush TLBs ldmfd sp!, {ip} msr spsr, ip @ Save tasks CPSR into SPSR for this return diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index ff55c8ffa..be9fad45e 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -29,11 +29,11 @@ _sa110_flush_cache_all: @ preserves r0 mov r2, #1 _sa110_flush_cache_all_r2: ldr r3, =Lclean_switch + ldr ip, =FLUSH_BASE ldr r1, [r3] ands r1, r1, #1 eor r1, r1, #1 str r1, [r3] - ldr ip, =FLUSH_BASE addne ip, ip, #32768 add r1, ip, #16384 @ only necessary for 16k 1: ldr r3, [ip], #32 @@ -226,12 +226,12 @@ _sa110_switch_to: ldr r2, [r0, #TSS_MEMMAP] @ Get old page tables str sp, [r0, #TSS_SAVE] @ Save sp_SVC ldr sp, [r1, #TSS_SAVE] @ Get saved sp_SVC - ldr r4, [r1, #TSK_ADDR_LIMIT] - teq r4, #0 - moveq r4, #DOM_KERNELDOMAIN - movne r4, #DOM_USERDOMAIN - mcr p15, 0, r4, c3, c0 @ Set segment + ldr r5, [r1, #TSK_ADDR_LIMIT] ldr r4, [r1, #TSS_MEMMAP] @ Page table pointer + teq r5, #0 + moveq r5, #DOM_KERNELDOMAIN + movne r5, #DOM_USERDOMAIN + mcr p15, 0, r5, c3, c0 @ Set segment /* * Flushing the cache is nightmarishly slow, so we take any excuse * to get out of it. If the old page table is the same as the new, @@ -288,7 +288,8 @@ _sa110_data_abort: */ .align 5 _sa110_set_pmd: str r1, [r0] - mcr p15, 0, r0, c7, c10, 1 @ clean D entry (drain is done by TLB fns) + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB (TLB bypasses WB) mov pc, lr /* @@ -318,6 +319,7 @@ _sa110_set_pte: str r1, [r0], #-1024 @ linux version str r2, [r0] @ hardware version mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry (drain is done by TLB fns) + mcr p15, 0, r0, c7, c10, 4 @ drain WB (TLB bypasses WB) mov pc, lr /* diff --git a/arch/arm/nwfpe/ARM-gcc.h b/arch/arm/nwfpe/ARM-gcc.h new file mode 100644 index 000000000..d726aa452 --- /dev/null +++ b/arch/arm/nwfpe/ARM-gcc.h @@ -0,0 +1,128 @@ + +/* +------------------------------------------------------------------------------- +One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. +------------------------------------------------------------------------------- +*/ +#define LITTLEENDIAN + +/* +------------------------------------------------------------------------------- +The macro `BITS64' can be defined to indicate that 64-bit integer types are +supported by the compiler. +------------------------------------------------------------------------------- +*/ +#define BITS64 + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines the most convenient type that holds +integers of at least as many bits as specified. For example, `uint8' should +be the most convenient type that can hold unsigned integers of as many as +8 bits. The `flag' type must be able to hold either a 0 or 1. For most +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +to the same as `int'. +------------------------------------------------------------------------------- +*/ +typedef char flag; +typedef unsigned char uint8; +typedef signed char int8; +typedef int uint16; +typedef int int16; +typedef unsigned int uint32; +typedef signed int int32; +#ifdef BITS64 +typedef unsigned long long int bits64; +typedef signed long long int sbits64; +#endif + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines a type that holds integers +of _exactly_ the number of bits specified. For instance, for most +implementation of C, `bits16' and `sbits16' should be `typedef'ed to +`unsigned short int' and `signed short int' (or `short int'), respectively. +------------------------------------------------------------------------------- +*/ +typedef unsigned char bits8; +typedef signed char sbits8; +typedef unsigned short int bits16; +typedef signed short int sbits16; +typedef unsigned int bits32; +typedef signed int sbits32; +#ifdef BITS64 +typedef unsigned long long int uint64; +typedef signed long long int int64; +#endif + +#ifdef BITS64 +/* +------------------------------------------------------------------------------- +The `LIT64' macro takes as its argument a textual integer literal and if +necessary ``marks'' the literal as having a 64-bit integer type. For +example, the Gnu C Compiler (`gcc') requires that 64-bit literals be +appended with the letters `LL' standing for `long long', which is `gcc's +name for the 64-bit integer type. Some compilers may allow `LIT64' to be +defined as the identity macro: `#define LIT64( a ) a'. +------------------------------------------------------------------------------- +*/ +#define LIT64( a ) a##LL +#endif + +/* +------------------------------------------------------------------------------- +The macro `INLINE' can be used before functions that should be inlined. If +a compiler does not support explicit inlining, this macro should be defined +to be `static'. +------------------------------------------------------------------------------- +*/ +#define INLINE extern __inline__ + + +/* For use as a GCC soft-float library we need some special function names. */ + +#ifdef __LIBFLOAT__ + +/* Some 32-bit ops can be mapped straight across by just changing the name. */ +#define float32_add __addsf3 +#define float32_sub __subsf3 +#define float32_mul __mulsf3 +#define float32_div __divsf3 +#define int32_to_float32 __floatsisf +#define float32_to_int32_round_to_zero __fixsfsi +#define float32_to_uint32_round_to_zero __fixunssfsi + +/* These ones go through the glue code. To avoid namespace pollution + we rename the internal functions too. */ +#define float32_eq ___float32_eq +#define float32_le ___float32_le +#define float32_lt ___float32_lt + +/* All the 64-bit ops have to go through the glue, so we pull the same + trick. */ +#define float64_add ___float64_add +#define float64_sub ___float64_sub +#define float64_mul ___float64_mul +#define float64_div ___float64_div +#define int32_to_float64 ___int32_to_float64 +#define float64_to_int32_round_to_zero ___float64_to_int32_round_to_zero +#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero +#define float64_to_float32 ___float64_to_float32 +#define float32_to_float64 ___float32_to_float64 +#define float64_eq ___float64_eq +#define float64_le ___float64_le +#define float64_lt ___float64_lt + +#if 0 +#define float64_add __adddf3 +#define float64_sub __subdf3 +#define float64_mul __muldf3 +#define float64_div __divdf3 +#define int32_to_float64 __floatsidf +#define float64_to_int32_round_to_zero __fixdfsi +#define float64_to_uint32_round_to_zero __fixunsdfsi +#define float64_to_float32 __truncdfsf2 +#define float32_to_float64 __extendsfdf2 +#endif + +#endif diff --git a/arch/arm/nwfpe/ChangeLog b/arch/arm/nwfpe/ChangeLog new file mode 100644 index 000000000..e160d36c3 --- /dev/null +++ b/arch/arm/nwfpe/ChangeLog @@ -0,0 +1,20 @@ +1998-11-23 Scott Bambrough <scottb@corelcomputer.com> + + * README.FPE - fix typo in description of lfm/sfm instructions + * NOTES - Added file to describe known bugs/problems + * fpmodule.c - Changed version number to 0.94 + +1998-11-20 Scott Bambrough <scottb@corelcomputer.com> + + * README.FPE - fix description of URD, NRM instructions + * TODO - remove URD, NRM instructions from TODO list + * single_cpdo.c - implement URD, NRM + * double_cpdo.c - implement URD, NRM + * extended_cpdo.c - implement URD, NRM + +1998-11-19 Scott Bambrough <scottb@corelcomputer.com> + + * ChangeLog - Added this file to track changes made. + * fpa11.c - added code to initialize register types to typeNone + * fpa11_cpdt.c - fixed bug in storeExtended (typeExtended changed to + typeDouble in switch statement) diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile new file mode 100644 index 000000000..5db79c6d4 --- /dev/null +++ b/arch/arm/nwfpe/Makefile @@ -0,0 +1,31 @@ +# +# linux/arch/arm/nwfpe/Makefile +# +# Copyright (C) 1998, 1999 Philip Blundell +# + +NWFPE_OBJS := fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \ + fpmodule.o fpopcode.o softfloat.o \ + single_cpdo.o double_cpdo.o extended_cpdo.o + +ifeq ($(CONFIG_CPU_26),y) +NWFPE_OBJS += entry26.o +else +NWFPE_OBJS += entry.o +endif + +L_TARGET := math-emu.a + +ifeq ($(CONFIG_NWFPE),y) +L_OBJS = $(NWFPE_OBJS) +else + ifeq ($(CONFIG_NWFPE),m) + M_OBJS = nwfpe.o + MI_OBJS = $(NWFPE_OBJS) + endif +endif + +include $(TOPDIR)/Rules.make + +nwfpe.o: $(MI_OBJS) $(MIX_OBJS) + $(LD) $(LD_RFLAG) -r -o $@ $(MI_OBJS) $(MIX_OBJS) diff --git a/arch/arm/nwfpe/config.h b/arch/arm/nwfpe/config.h new file mode 100644 index 000000000..35f9d6336 --- /dev/null +++ b/arch/arm/nwfpe/config.h @@ -0,0 +1,31 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#if 1 +#define C_SYMBOL_NAME(foo) foo +#else +#define C_SYMBOL_NAME(foo) _##foo +#endif + +#endif diff --git a/arch/arm/nwfpe/double_cpdo.c b/arch/arm/nwfpe/double_cpdo.c new file mode 100644 index 000000000..e746c7a29 --- /dev/null +++ b/arch/arm/nwfpe/double_cpdo.c @@ -0,0 +1,293 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" + +extern FPA11 *fpa11; + +float64 getDoubleConstant(unsigned int); + +float64 float64_exp(float64 Fm); +float64 float64_ln(float64 Fm); +float64 float64_sin(float64 rFm); +float64 float64_cos(float64 rFm); +float64 float64_arcsin(float64 rFm); +float64 float64_arctan(float64 rFm); +float64 float64_log(float64 rFm); +float64 float64_tan(float64 rFm); +float64 float64_arccos(float64 rFm); +float64 float64_pow(float64 rFn,float64 rFm); +float64 float64_pol(float64 rFn,float64 rFm); + +unsigned int DoubleCPDO(const unsigned int opcode) +{ + float64 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + //fp_printk("DoubleCPDO(0x%08x)\n",opcode); + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getDoubleConstant(Fm); + } + else + { + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + rFm = float32_to_float64(fpa11->fpreg[Fm].fValue.fSingle); + break; + + case typeDouble: + rFm = fpa11->fpreg[Fm].fValue.fDouble; + break; + + case typeExtended: + // !! patb + //fp_printk("not implemented! why not?\n"); + //!! ScottB + // should never get here, if extended involved + // then other operand should be promoted then + // ExtendedCPDO called. + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + rFn = float32_to_float64(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + rFn = fpa11->fpreg[Fn].fValue.fDouble; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */ + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fValue.fDouble = rFm; + break; + + case MNF_CODE: + { + unsigned int *p = (unsigned int*)&rFm; + p[1] ^= 0x80000000; + fpa11->fpreg[Fd].fValue.fDouble = rFm; + } + break; + + case ABS_CODE: + { + unsigned int *p = (unsigned int*)&rFm; + p[1] &= 0x7fffffff; + fpa11->fpreg[Fd].fValue.fDouble = rFm; + } + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fValue.fDouble = + int32_to_float64(float64_to_int32(rFm)); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fValue.fDouble = float64_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fpreg[Fd].fType = typeDouble; + return nRc; +} + +#if 0 +float64 float64_exp(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_ln(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_sin(float64 rFm) +{ + return rFm; +//series +} + +float64 float64_cos(float64 rFm) +{ + return rFm; + //series +} + +#if 0 +float64 float64_arcsin(float64 rFm) +{ +//series +} + +float64 float64_arctan(float64 rFm) +{ + //series +} +#endif + +float64 float64_log(float64 rFm) +{ + return float64_div(float64_ln(rFm),getDoubleConstant(7)); +} + +float64 float64_tan(float64 rFm) +{ + return float64_div(float64_sin(rFm),float64_cos(rFm)); +} + +float64 float64_arccos(float64 rFm) +{ +return rFm; + //return float64_sub(halfPi,float64_arcsin(rFm)); +} + +float64 float64_pow(float64 rFn,float64 rFm) +{ + return float64_exp(float64_mul(rFm,float64_ln(rFn))); +} + +float64 float64_pol(float64 rFn,float64 rFm) +{ + return float64_arctan(float64_div(rFn,rFm)); +} +#endif diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S new file mode 100644 index 000000000..6f0077fbe --- /dev/null +++ b/arch/arm/nwfpe/entry.S @@ -0,0 +1,126 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* This is the kernel's entry point into the floating point emulator. +It is called from the kernel with code similar to this: + + adrsvc al, r9, ret_from_exception @ r9 = normal FP return + adrsvc al, lr, fpundefinstr @ lr = undefined instr return + + get_current_task r10 + mov r8, #1 + strb r8, [r10, #TSK_USED_MATH] @ set current->used_math + add r10, r10, #TSS_FPESAVE @ r10 = workspace + ldr r4, .LC2 + ldr pc, [r4] @ Call FP emulator entry point + +The kernel expects the emulator to return via one of two possible +points of return it passes to the emulator. The emulator, if +successful in its emulation, jumps to ret_from_exception (passed in +r9) and the kernel takes care of returning control from the trap to +the user code. If the emulator is unable to emulate the instruction, +it returns via _fpundefinstr (passed via lr) and the kernel halts the +user program with a core dump. + +On entry to the emulator r10 points to an area of private FP workspace +reserved in the thread structure for this process. This is where the +emulator saves its registers across calls. The first word of this area +is used as a flag to detect the first time a process uses floating point, +so that the emulator startup cost can be avoided for tasks that don't +want it. + +This routine does three things: + +1) It saves SP into a variable called userRegisters. The kernel has +created a struct pt_regs on the stack and saved the user registers +into it. See /usr/include/asm/proc/ptrace.h for details. The +emulator code uses userRegisters as the base of an array of words from +which the contents of the registers can be extracted. + +2) It calls EmulateAll to emulate a floating point instruction. +EmulateAll returns 1 if the emulation was successful, or 0 if not. + +3) If an instruction has been emulated successfully, it looks ahead at +the next instruction. If it is a floating point instruction, it +executes the instruction, without returning to user space. In this +way it repeatedly looks ahead and executes floating point instructions +until it encounters a non floating point instruction, at which time it +returns via _fpreturn. + +This is done to reduce the effect of the trap overhead on each +floating point instructions. GCC attempts to group floating point +instructions to allow the emulator to spread the cost of the trap over +several floating point instructions. */ + + .globl nwfpe_enter +nwfpe_enter: + /* ?? Could put userRegisters and fpa11 into fixed regs during + emulation. This would reduce load/store overhead at the expense + of stealing two regs from the register allocator. Not sure if + it's worth it. */ + ldr r4, =userRegisters + str sp, [r4] @ save pointer to user regs + ldr r4, =fpa11 + str r10, [r4] @ store pointer to our state + mov r4, sp @ use r4 for local pointer + mov r10, lr @ save the failure-return addresses + + ldr r5, [r4, #60] @ get contents of PC; + ldr r0, [r5, #-4] @ get actual instruction into r0 +emulate: + bl EmulateAll @ emulate the instruction + cmp r0, #0 @ was emulation successful + moveq pc, r10 @ no, return failure + +next: +__x1: ldrt r6, [r5], #4 @ get the next instruction and + @ increment PC + + and r2, r6, #0x0F000000 @ test for FP insns + teq r2, #0x0C000000 + teqne r2, #0x0D000000 + teqne r2, #0x0E000000 + movne pc, r9 @ return ok if not a fp insn + + str r5, [r4, #60] @ update PC copy in regs + + mov r0, r6 @ save a copy + ldr r1, [r4, #64] @ fetch the condition codes + bl checkCondition @ check the condition + cmp r0, #0 @ r0 = 0 ==> condition failed + + @ if condition code failed to match, next insn + beq next @ get the next instruction; + + mov r0, r6 @ prepare for EmulateAll() + b emulate @ if r0 != 0, goto EmulateAll + + @ We need to be prepared for the instruction at __x1 to fault. + @ Emit the appropriate exception gunk to fix things up. + .section .fixup,"ax" + .align +__f1: mov pc, r9 + .previous + .section __ex_table,"a" + .align 3 + .long __x1, __f1 + .previous diff --git a/arch/arm/nwfpe/entry26.S b/arch/arm/nwfpe/entry26.S new file mode 100644 index 000000000..6b1ec3354 --- /dev/null +++ b/arch/arm/nwfpe/entry26.S @@ -0,0 +1,112 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "../lib/constants.h" + +/* This is the kernel's entry point into the floating point emulator. +It is called from the kernel with code similar to this: + + mov fp, #0 + teqp pc, #I_BIT | MODE_SVC + ldr r4, .LC2 + ldr pc, [r4] @ Call FP module USR entry point + +The kernel expects the emulator to return via one of two possible +points of return it passes to the emulator. The emulator, if +successful in its emulation, jumps to ret_from_exception and the +kernel takes care of returning control from the trap to the user code. +If the emulator is unable to emulate the instruction, it returns to +fpundefinstr and the kernel halts the user program with a core dump. + +This routine does four things: + +1) It saves SP into a variable called userRegisters. The kernel has +created a struct pt_regs on the stack and saved the user registers +into it. See /usr/include/asm/proc/ptrace.h for details. The +emulator code uses userRegisters as the base of an array of words from +which the contents of the registers can be extracted. + +2) It locates the FP emulator work area within the TSS structure and +points `fpa11' to it. + +3) It calls EmulateAll to emulate a floating point instruction. +EmulateAll returns 1 if the emulation was successful, or 0 if not. + +4) If an instruction has been emulated successfully, it looks ahead at +the next instruction. If it is a floating point instruction, it +executes the instruction, without returning to user space. In this +way it repeatedly looks ahead and executes floating point instructions +until it encounters a non floating point instruction, at which time it +returns via _fpreturn. + +This is done to reduce the effect of the trap overhead on each +floating point instructions. GCC attempts to group floating point +instructions to allow the emulator to spread the cost of the trap over +several floating point instructions. */ + + .globl nwfpe_enter +nwfpe_enter: + ldr r4, =userRegisters + str sp, [r4] @ save pointer to user regs + + mov r10, sp, lsr #13 @ find workspace + mov r10, r10, lsl #13 + add r10, r10, #TSS_FPESAVE + + ldr r4, =fpa11 + str r10, [r4] @ store pointer to our state + mov r4, sp @ use r4 for local pointer + + ldr r5, [r4, #60] @ get contents of PC + bic r5, r5, #0xfc000003 + ldr r0, [r5, #-4] @ get actual instruction into r0 + bl EmulateAll @ emulate the instruction +1: cmp r0, #0 @ was emulation successful + beq fpundefinstr @ no, return failure + +next: + ldrt r6, [r5], #4 @ get the next instruction and + @ increment PC + + and r2, r6, #0x0F000000 @ test for FP insns + teq r2, #0x0C000000 + teqne r2, #0x0D000000 + teqne r2, #0x0E000000 + bne ret_from_exception @ return ok if not a fp insn + + ldr r9, [r4, #60] @ get new condition codes + and r9, r9, #0xfc000003 + orr r7, r5, r9 + str r7, [r4, #60] @ update PC copy in regs + + mov r0, r6 @ save a copy + mov r1, r9 @ fetch the condition codes + bl checkCondition @ check the condition + cmp r0, #0 @ r0 = 0 ==> condition failed + + @ if condition code failed to match, next insn + beq next @ get the next instruction; + + mov r0, r6 @ prepare for EmulateAll() + adr lr, 1b + orr lr, lr, #3 + b EmulateAll @ if r0 != 0, goto EmulateAll diff --git a/arch/arm/nwfpe/extended_cpdo.c b/arch/arm/nwfpe/extended_cpdo.c new file mode 100644 index 000000000..1c5c66180 --- /dev/null +++ b/arch/arm/nwfpe/extended_cpdo.c @@ -0,0 +1,276 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" + +floatx80 getExtendedConstant(unsigned int); + +floatx80 floatx80_exp(floatx80 Fm); +floatx80 floatx80_ln(floatx80 Fm); +floatx80 floatx80_sin(floatx80 rFm); +floatx80 floatx80_cos(floatx80 rFm); +floatx80 floatx80_arcsin(floatx80 rFm); +floatx80 floatx80_arctan(floatx80 rFm); +floatx80 floatx80_log(floatx80 rFm); +floatx80 floatx80_tan(floatx80 rFm); +floatx80 floatx80_arccos(floatx80 rFm); +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm); +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm); + +unsigned int ExtendedCPDO(const unsigned int opcode) +{ + floatx80 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + //fp_printk("ExtendedCPDO(0x%08x)\n",opcode); + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getExtendedConstant(Fm); + } + else + { + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fValue.fSingle); + break; + + case typeDouble: + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fValue.fDouble); + break; + + case typeExtended: + rFm = fpa11->fpreg[Fm].fValue.fExtended; + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble); + break; + + case typeExtended: + rFn = fpa11->fpreg[Fn].fValue.fExtended; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fValue.fExtended = rFm; + break; + + case MNF_CODE: + rFm.high ^= 0x8000; + fpa11->fpreg[Fd].fValue.fExtended = rFm; + break; + + case ABS_CODE: + rFm.high &= 0x7fff; + fpa11->fpreg[Fd].fValue.fExtended = rFm; + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fValue.fExtended = + int32_to_floatx80(floatx80_to_int32(rFm)); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fValue.fExtended = floatx80_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fpreg[Fd].fType = typeExtended; + return nRc; +} + +#if 0 +floatx80 floatx80_exp(floatx80 Fm) +{ +//series +} + +floatx80 floatx80_ln(floatx80 Fm) +{ +//series +} + +floatx80 floatx80_sin(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_cos(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_arcsin(floatx80 rFm) +{ +//series +} + +floatx80 floatx80_arctan(floatx80 rFm) +{ + //series +} + +floatx80 floatx80_log(floatx80 rFm) +{ + return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7)); +} + +floatx80 floatx80_tan(floatx80 rFm) +{ + return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm)); +} + +floatx80 floatx80_arccos(floatx80 rFm) +{ + //return floatx80_sub(halfPi,floatx80_arcsin(rFm)); +} + +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm) +{ + return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn))); +} + +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm) +{ + return floatx80_arctan(floatx80_div(rFn,rFm)); +} +#endif diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c new file mode 100644 index 000000000..506821ca8 --- /dev/null +++ b/arch/arm/nwfpe/fpa11.c @@ -0,0 +1,206 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "fpa11.h" +#include "milieu.h" +#include "fpopcode.h" + +#include "fpmodule.h" +#include "fpmodule.inl" + +/* forward declarations */ +unsigned int EmulateCPDO(const unsigned int); +unsigned int EmulateCPDT(const unsigned int); +unsigned int EmulateCPRT(const unsigned int); + +/* Emulator registers */ +FPA11 *fpa11; + +/* Reset the FPA11 chip. Called to initialize and reset the emulator. */ +void resetFPA11(void) +{ + int i; + /* initialize the registers */ + for (i=0;i<=7;i++) + { + fpa11->fpreg[i].fType = typeNone; + } + + /* FPSR: set system id to FP_EMULATOR, clear all other bits */ + fpa11->fpsr = FP_EMULATOR; + + /* FPCR: set SB, AB and DA bits, clear all others */ +#if MAINTAIN_FPCR + fpa11->fpcr = MASK_RESET; +#endif +} + +void SetRoundingMode(const unsigned int opcode) +{ +#if MAINTAIN_FPCR + fpa11->fpcr &= ~MASK_ROUNDING_MODE; +#endif + switch (opcode & MASK_ROUNDING_MODE) + { + default: + case ROUND_TO_NEAREST: + float_rounding_mode = float_round_nearest_even; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_NEAREST; +#endif + break; + + case ROUND_TO_PLUS_INFINITY: + float_rounding_mode = float_round_up; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_PLUS_INFINITY; +#endif + break; + + case ROUND_TO_MINUS_INFINITY: + float_rounding_mode = float_round_down; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_MINUS_INFINITY; +#endif + break; + + case ROUND_TO_ZERO: + float_rounding_mode = float_round_to_zero; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_TO_ZERO; +#endif + break; + } +} + +void SetRoundingPrecision(const unsigned int opcode) +{ +#if MAINTAIN_FPCR + fpa11->fpcr &= ~MASK_ROUNDING_PRECISION; +#endif + switch (opcode & MASK_ROUNDING_PRECISION) + { + case ROUND_SINGLE: + floatx80_rounding_precision = 32; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_SINGLE; +#endif + break; + + case ROUND_DOUBLE: + floatx80_rounding_precision = 64; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_DOUBLE; +#endif + break; + + case ROUND_EXTENDED: + floatx80_rounding_precision = 80; +#if MAINTAIN_FPCR + fpa11->fpcr |= ROUND_EXTENDED; +#endif + break; + + default: floatx80_rounding_precision = 80; + } +} + +/* Emulate the instruction in the opcode. */ +unsigned int EmulateAll(unsigned int opcode) +{ + unsigned int nRc = 0; + + if (fpa11->initflag == 0) /* good place for __builtin_expect */ + { + resetFPA11(); + SetRoundingMode(ROUND_TO_NEAREST); + SetRoundingPrecision(ROUND_EXTENDED); + fpa11->initflag = 1; + } + + if (TEST_OPCODE(opcode,MASK_CPRT)) + { + /* Emulate conversion opcodes. */ + /* Emulate register transfer opcodes. */ + /* Emulate comparison opcodes. */ + nRc = EmulateCPRT(opcode); + } + else if (TEST_OPCODE(opcode,MASK_CPDO)) + { + /* Emulate monadic arithmetic opcodes. */ + /* Emulate dyadic arithmetic opcodes. */ + nRc = EmulateCPDO(opcode); + } + else if (TEST_OPCODE(opcode,MASK_CPDT)) + { + /* Emulate load/store opcodes. */ + /* Emulate load/store multiple opcodes. */ + nRc = EmulateCPDT(opcode); + } + else + { + /* Invalid instruction detected. Return FALSE. */ + nRc = 0; + } + + return(nRc); +} + +#if 0 +unsigned int EmulateAll1(unsigned int opcode) +{ + switch ((opcode >> 24) & 0xf) + { + case 0xc: + case 0xd: + if ((opcode >> 20) & 0x1) + { + switch ((opcode >> 8) & 0xf) + { + case 0x1: return PerformLDF(opcode); break; + case 0x2: return PerformLFM(opcode); break; + default: return 0; + } + } + else + { + switch ((opcode >> 8) & 0xf) + { + case 0x1: return PerformSTF(opcode); break; + case 0x2: return PerformSFM(opcode); break; + default: return 0; + } + } + break; + + case 0xe: + if (opcode & 0x10) + return EmulateCPDO(opcode); + else + return EmulateCPRT(opcode); + break; + + default: return 0; + } +} +#endif + diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h new file mode 100644 index 000000000..4a47a29f4 --- /dev/null +++ b/arch/arm/nwfpe/fpa11.h @@ -0,0 +1,61 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPA11_H__ +#define __FPA11_H__ + +/* includes */ +#include "fpsr.h" /* FP control and status register definitions */ +#include "softfloat.h" + +#define typeNone 0x00 +#define typeSingle 0x01 +#define typeDouble 0x02 +#define typeExtended 0x03 + +typedef struct tagFPREG { + unsigned int fType; + union { + float32 fSingle; + float64 fDouble; + floatx80 fExtended; + } fValue; +} FPREG; + +/* FPA11 device model */ +typedef struct tagFPA11 { + int initflag; /* this is special. The kernel guarantees + to set it to 0 when a thread is launched, + so we can use it to detect whether this + instance of the emulator needs to be + initialised. */ + FPREG fpreg[8]; /* 8 floating point registers */ + FPSR fpsr; /* floating point status register */ + FPCR fpcr; /* floating point control register */ +} FPA11; + +extern void resetFPA11(void); +extern void SetRoundingMode(const unsigned int); +extern void SetRoundingPrecision(const unsigned int); + +extern FPA11 *fpa11; + +#endif diff --git a/arch/arm/nwfpe/fpa11.inl b/arch/arm/nwfpe/fpa11.inl new file mode 100644 index 000000000..321ab7c1c --- /dev/null +++ b/arch/arm/nwfpe/fpa11.inl @@ -0,0 +1,47 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "fpa11.h" + +/* Read and write floating point status register */ +extern __inline__ unsigned int readFPSR(void) +{ + return(fpa11->fpsr); +} + +extern __inline__ void writeFPSR(FPSR reg) +{ + /* the sysid byte in the status register is readonly */ + fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID); +} + +/* Read and write floating point control register */ +extern __inline__ FPCR readFPCR(void) +{ + /* clear SB, AB and DA bits before returning FPCR */ + return(fpa11->fpcr & ~MASK_RFC); +} + +extern __inline__ void writeFPCR(FPCR reg) +{ + fpa11->fpcr &= ~MASK_WFC; /* clear SB, AB and DA bits */ + fpa11->fpcr |= (reg & MASK_WFC); /* write SB, AB and DA bits */ +} diff --git a/arch/arm/nwfpe/fpa11_cpdo.c b/arch/arm/nwfpe/fpa11_cpdo.c new file mode 100644 index 000000000..c337c553a --- /dev/null +++ b/arch/arm/nwfpe/fpa11_cpdo.c @@ -0,0 +1,117 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "fpa11.h" +#include "fpopcode.h" + +unsigned int SingleCPDO(const unsigned int opcode); +unsigned int DoubleCPDO(const unsigned int opcode); +unsigned int ExtendedCPDO(const unsigned int opcode); + +unsigned int EmulateCPDO(const unsigned int opcode) +{ + unsigned int Fd, nType, nDest, nRc = 1; + + //fp_printk("EmulateCPDO(0x%08x)\n",opcode); + + /* Get the destination size. If not valid let Linux perform + an invalid instruction trap. */ + nDest = getDestinationSize(opcode); + if (typeNone == nDest) return 0; + + SetRoundingMode(opcode); + + /* Compare the size of the operands in Fn and Fm. + Choose the largest size and perform operations in that size, + in order to make use of all the precision of the operands. + If Fm is a constant, we just grab a constant of a size + matching the size of the operand in Fn. */ + if (MONADIC_INSTRUCTION(opcode)) + nType = nDest; + else + nType = fpa11->fpreg[getFn(opcode)].fType; + + if (!CONSTANT_FM(opcode)) + { + register unsigned int Fm = getFm(opcode); + if (nType < fpa11->fpreg[Fm].fType) + { + nType = fpa11->fpreg[Fm].fType; + } + } + + switch (nType) + { + case typeSingle : nRc = SingleCPDO(opcode); break; + case typeDouble : nRc = DoubleCPDO(opcode); break; + case typeExtended : nRc = ExtendedCPDO(opcode); break; + default : nRc = 0; + } + + /* If the operation succeeded, check to see if the result in the + destination register is the correct size. If not force it + to be. */ + Fd = getFd(opcode); + nType = fpa11->fpreg[Fd].fType; + if ((0 != nRc) && (nDest != nType)) + { + switch (nDest) + { + case typeSingle: + { + if (typeDouble == nType) + fpa11->fpreg[Fd].fValue.fSingle = + float64_to_float32(fpa11->fpreg[Fd].fValue.fDouble); + else + fpa11->fpreg[Fd].fValue.fSingle = + floatx80_to_float32(fpa11->fpreg[Fd].fValue.fExtended); + } + break; + + case typeDouble: + { + if (typeSingle == nType) + fpa11->fpreg[Fd].fValue.fDouble = + float32_to_float64(fpa11->fpreg[Fd].fValue.fSingle); + else + fpa11->fpreg[Fd].fValue.fDouble = + floatx80_to_float64(fpa11->fpreg[Fd].fValue.fExtended); + } + break; + + case typeExtended: + { + if (typeSingle == nType) + fpa11->fpreg[Fd].fValue.fExtended = + float32_to_floatx80(fpa11->fpreg[Fd].fValue.fSingle); + else + fpa11->fpreg[Fd].fValue.fExtended = + float64_to_floatx80(fpa11->fpreg[Fd].fValue.fDouble); + } + break; + } + + fpa11->fpreg[Fd].fType = nDest; + } + + return nRc; +} diff --git a/arch/arm/nwfpe/fpa11_cpdt.c b/arch/arm/nwfpe/fpa11_cpdt.c new file mode 100644 index 000000000..9617a79a3 --- /dev/null +++ b/arch/arm/nwfpe/fpa11_cpdt.c @@ -0,0 +1,330 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" +#include "fpmodule.h" +#include "fpmodule.inl" + +#include <asm/uaccess.h> + +extern __inline__ +void loadSingle(const unsigned int Fn,const unsigned int *pMem) +{ + fpa11->fpreg[Fn].fType = typeSingle; + get_user(fpa11->fpreg[Fn].fValue.fSingle, pMem); +} + +extern __inline__ +void loadDouble(const unsigned int Fn,const unsigned int *pMem) +{ + unsigned int *p; + p = (unsigned int*)&fpa11->fpreg[Fn].fValue.fDouble; + fpa11->fpreg[Fn].fType = typeDouble; + get_user(p[0], &pMem[1]); + get_user(p[1], &pMem[0]); /* sign & exponent */ +} + +extern __inline__ +void loadExtended(const unsigned int Fn,const unsigned int *pMem) +{ + unsigned int *p; + p = (unsigned int*)&fpa11->fpreg[Fn].fValue.fExtended; + fpa11->fpreg[Fn].fType = typeExtended; + get_user(p[0], &pMem[0]); /* sign & exponent */ + get_user(p[1], &pMem[2]); /* ls bits */ + get_user(p[2], &pMem[1]); /* ms bits */ +} + +extern __inline__ +void loadMultiple(const unsigned int Fn,const unsigned int *pMem) +{ + register unsigned int *p; + unsigned long x; + + p = (unsigned int*)&(fpa11->fpreg[Fn].fValue); + get_user(x, &pMem[0]); + fpa11->fpreg[Fn].fType = (x >> 14) & 0x00000003; + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + case typeDouble: + { + get_user(p[0], &pMem[2]); /* Single */ + get_user(p[1], &pMem[1]); /* double msw */ + p[2] = 0; /* empty */ + } + break; + + case typeExtended: + { + get_user(p[1], &pMem[2]); + get_user(p[2], &pMem[1]); /* msw */ + p[0] = (x & 0x80003fff); + } + break; + } +} + +extern __inline__ +void storeSingle(const unsigned int Fn,unsigned int *pMem) +{ + float32 val; + register unsigned int *p = (unsigned int*)&val; + + switch (fpa11->fpreg[Fn].fType) + { + case typeDouble: + val = float64_to_float32(fpa11->fpreg[Fn].fValue.fDouble); + break; + + case typeExtended: + val = floatx80_to_float32(fpa11->fpreg[Fn].fValue.fExtended); + break; + + default: val = fpa11->fpreg[Fn].fValue.fSingle; + } + + put_user(p[0], pMem); +} + +extern __inline__ +void storeDouble(const unsigned int Fn,unsigned int *pMem) +{ + float64 val; + register unsigned int *p = (unsigned int*)&val; + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + val = float32_to_float64(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeExtended: + val = floatx80_to_float64(fpa11->fpreg[Fn].fValue.fExtended); + break; + + default: val = fpa11->fpreg[Fn].fValue.fDouble; + } + put_user(p[1], &pMem[0]); /* msw */ + put_user(p[0], &pMem[1]); /* lsw */ +} + +extern __inline__ +void storeExtended(const unsigned int Fn,unsigned int *pMem) +{ + floatx80 val; + register unsigned int *p = (unsigned int*)&val; + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + val = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + val = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble); + break; + + default: val = fpa11->fpreg[Fn].fValue.fExtended; + } + + put_user(p[0], &pMem[0]); /* sign & exp */ + put_user(p[1], &pMem[2]); + put_user(p[2], &pMem[1]); /* msw */ +} + +extern __inline__ +void storeMultiple(const unsigned int Fn,unsigned int *pMem) +{ + register unsigned int nType, *p; + + p = (unsigned int*)&(fpa11->fpreg[Fn].fValue); + nType = fpa11->fpreg[Fn].fType; + + switch (nType) + { + case typeSingle: + case typeDouble: + { + put_user(p[0], &pMem[2]); /* single */ + put_user(p[1], &pMem[1]); /* double msw */ + put_user(nType << 14, &pMem[0]); + } + break; + + case typeExtended: + { + put_user(p[2], &pMem[1]); /* msw */ + put_user(p[1], &pMem[2]); + put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]); + } + break; + } +} + +unsigned int PerformLDF(const unsigned int opcode) +{ + unsigned int *pBase, *pAddress, *pFinal, nRc = 1; + + //fp_printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode)); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case TRANSFER_SINGLE : loadSingle(getFd(opcode),pAddress); break; + case TRANSFER_DOUBLE : loadDouble(getFd(opcode),pAddress); break; + case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break; + default: nRc = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return nRc; +} + +unsigned int PerformSTF(const unsigned int opcode) +{ + unsigned int *pBase, *pAddress, *pFinal, nRc = 1; + + //fp_printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode)); + SetRoundingMode(ROUND_TO_NEAREST); + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case TRANSFER_SINGLE : storeSingle(getFd(opcode),pAddress); break; + case TRANSFER_DOUBLE : storeDouble(getFd(opcode),pAddress); break; + case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break; + default: nRc = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return nRc; +} + +unsigned int PerformLFM(const unsigned int opcode) +{ + unsigned int i, Fd, *pBase, *pAddress, *pFinal; + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + Fd = getFd(opcode); + for (i=getRegisterCount(opcode);i>0;i--) + { + loadMultiple(Fd,pAddress); + pAddress += 3; Fd++; + if (Fd == 8) Fd = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return 1; +} + +unsigned int PerformSFM(const unsigned int opcode) +{ + unsigned int i, Fd, *pBase, *pAddress, *pFinal; + + pBase = (unsigned int*)readRegister(getRn(opcode)); + if (REG_PC == getRn(opcode)) pBase += 2; + + pFinal = pBase; + if (BIT_UP_SET(opcode)) + pFinal += getOffset(opcode); + else + pFinal -= getOffset(opcode); + + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase; + + Fd = getFd(opcode); + for (i=getRegisterCount(opcode);i>0;i--) + { + storeMultiple(Fd,pAddress); + pAddress += 3; Fd++; + if (Fd == 8) Fd = 0; + } + + if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal); + return 1; +} + +#if 1 +unsigned int EmulateCPDT(const unsigned int opcode) +{ + unsigned int nRc = 0; + + //fp_printk("EmulateCPDT(0x%08x)\n",opcode); + + if (LDF_OP(opcode)) + { + nRc = PerformLDF(opcode); + } + else if (LFM_OP(opcode)) + { + nRc = PerformLFM(opcode); + } + else if (STF_OP(opcode)) + { + nRc = PerformSTF(opcode); + } + else if (SFM_OP(opcode)) + { + nRc = PerformSFM(opcode); + } + else + { + nRc = 0; + } + + return nRc; +} +#endif diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c new file mode 100644 index 000000000..bfe13ba1f --- /dev/null +++ b/arch/arm/nwfpe/fpa11_cprt.c @@ -0,0 +1,313 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell, 1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "milieu.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" +#include "fpa11.inl" +#include "fpmodule.h" +#include "fpmodule.inl" + +extern flag floatx80_is_nan(floatx80); +extern flag float64_is_nan( float64); +extern flag float32_is_nan( float32); + +void SetRoundingMode(const unsigned int opcode); + +unsigned int PerformFLT(const unsigned int opcode); +unsigned int PerformFIX(const unsigned int opcode); + +static unsigned int +PerformComparison(const unsigned int opcode); + +unsigned int EmulateCPRT(const unsigned int opcode) +{ + unsigned int nRc = 1; + + //fp_printk("EmulateCPRT(0x%08x)\n",opcode); + + if (opcode & 0x800000) + { + /* This is some variant of a comparison (PerformComparison will + sort out which one). Since most of the other CPRT + instructions are oddball cases of some sort or other it makes + sense to pull this out into a fast path. */ + return PerformComparison(opcode); + } + + /* Hint to GCC that we'd like a jump table rather than a load of CMPs */ + switch ((opcode & 0x700000) >> 20) + { + case FLT_CODE >> 20: nRc = PerformFLT(opcode); break; + case FIX_CODE >> 20: nRc = PerformFIX(opcode); break; + + case WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break; + case RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break; + +#if 0 + /* ?? Not at all sure about the mode checks here. Linux never + calls the emulator from a non-USR fault but we always run in SVC + mode. Is there even any point trying to emulate the way FPA11 + behaves in this respect? + + No - and I quote: 'The FPCR may only be present in some + implementations: it is there to control the hardware in an + implementation-specific manner, ... The user mode of the + ARM is not permitted to use this register, and the WFC and + RFC instructions will trap if tried from user mode.' + Therefore, we do not provide the RFC and WFC instructions. + (rmk, 3/05/1999) + */ + case WFC_CODE >> 20: + { + int mode = 0; + __asm__ volatile ("mrs %0, cpsr; and %0, %0, #0x1f;" : : "g" (mode)); + nRc = (0x13 == mode) ? 1 : 0; /* in SVC processor mode? */ + if (nRc) writeFPCR(readRegister(getRd(opcode))); + } + break; + + case RFC_CODE >> 20: + { + int mode = 0; + __asm__ volatile ("mrs %0, cpsr; and %0, %0, #0x1f;" : : "g" (mode)); + nRc = (0x13 == mode) ? 1 : 0; /* in SVC processor mode? */ + if (nRc) writeRegister(getRd(opcode),readFPCR()); break; + } + break; +#endif + + default: nRc = 0; + } + + return nRc; +} + +unsigned int PerformFLT(const unsigned int opcode) +{ + unsigned int nRc = 1; + SetRoundingMode(opcode); + SetRoundingPrecision(opcode); + + switch (opcode & MASK_ROUNDING_PRECISION) + { + case ROUND_SINGLE: + { + fpa11->fpreg[getFn(opcode)].fType = typeSingle; + fpa11->fpreg[getFn(opcode)].fValue.fSingle = + int32_to_float32(readRegister(getRd(opcode))); + } + break; + + case ROUND_DOUBLE: + { + fpa11->fpreg[getFn(opcode)].fType = typeDouble; + fpa11->fpreg[getFn(opcode)].fValue.fDouble = + int32_to_float64(readRegister(getRd(opcode))); + } + break; + + case ROUND_EXTENDED: + { + fpa11->fpreg[getFn(opcode)].fType = typeExtended; + fpa11->fpreg[getFn(opcode)].fValue.fExtended = + int32_to_floatx80(readRegister(getRd(opcode))); + } + break; + + default: nRc = 0; + } + + return nRc; +} + +unsigned int PerformFIX(const unsigned int opcode) +{ + unsigned int nRc = 1; + unsigned int Fn = getFm(opcode); + + SetRoundingMode(opcode); + + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + { + writeRegister(getRd(opcode), + float32_to_int32(fpa11->fpreg[Fn].fValue.fSingle)); + } + break; + + case typeDouble: + { + writeRegister(getRd(opcode), + float64_to_int32(fpa11->fpreg[Fn].fValue.fDouble)); + } + break; + + case typeExtended: + { + writeRegister(getRd(opcode), + floatx80_to_int32(fpa11->fpreg[Fn].fValue.fExtended)); + } + break; + + default: nRc = 0; + } + + return nRc; +} + + +static unsigned int __inline__ +PerformComparisonOperation(floatx80 Fn, floatx80 Fm) +{ + unsigned int flags = 0; + + /* test for less than condition */ + if (floatx80_lt(Fn,Fm)) + { + flags |= CC_NEGATIVE; + } + + /* test for equal condition */ + if (floatx80_eq(Fn,Fm)) + { + flags |= CC_ZERO; + } + + /* test for greater than or equal condition */ + if (floatx80_lt(Fm,Fn)) + { + flags |= CC_CARRY; + } + + writeConditionCodes(flags); + return 1; +} + +/* This instruction sets the flags N, Z, C, V in the FPSR. */ + +static unsigned int PerformComparison(const unsigned int opcode) +{ + unsigned int Fn, Fm; + floatx80 rFn, rFm; + int e_flag = opcode & 0x400000; /* 1 if CxFE */ + int n_flag = opcode & 0x200000; /* 1 if CNxx */ + unsigned int flags = 0; + + //fp_printk("PerformComparison(0x%08x)\n",opcode); + + Fn = getFn(opcode); + Fm = getFm(opcode); + + /* Check for unordered condition and convert all operands to 80-bit + format. + ?? Might be some mileage in avoiding this conversion if possible. + Eg, if both operands are 32-bit, detect this and do a 32-bit + comparison (cheaper than an 80-bit one). */ + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + //fp_printk("single.\n"); + if (float32_is_nan(fpa11->fpreg[Fn].fValue.fSingle)) + goto unordered; + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle); + break; + + case typeDouble: + //fp_printk("double.\n"); + if (float64_is_nan(fpa11->fpreg[Fn].fValue.fDouble)) + goto unordered; + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble); + break; + + case typeExtended: + //fp_printk("extended.\n"); + if (floatx80_is_nan(fpa11->fpreg[Fn].fValue.fExtended)) + goto unordered; + rFn = fpa11->fpreg[Fn].fValue.fExtended; + break; + + default: return 0; + } + + if (CONSTANT_FM(opcode)) + { + //fp_printk("Fm is a constant: #%d.\n",Fm); + rFm = getExtendedConstant(Fm); + if (floatx80_is_nan(rFm)) + goto unordered; + } + else + { + //fp_printk("Fm = r%d which contains a ",Fm); + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + //fp_printk("single.\n"); + if (float32_is_nan(fpa11->fpreg[Fm].fValue.fSingle)) + goto unordered; + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fValue.fSingle); + break; + + case typeDouble: + //fp_printk("double.\n"); + if (float64_is_nan(fpa11->fpreg[Fm].fValue.fDouble)) + goto unordered; + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fValue.fDouble); + break; + + case typeExtended: + //fp_printk("extended.\n"); + if (floatx80_is_nan(fpa11->fpreg[Fm].fValue.fExtended)) + goto unordered; + rFm = fpa11->fpreg[Fm].fValue.fExtended; + break; + + default: return 0; + } + } + + if (n_flag) + { + rFm.high ^= 0x8000; + } + + return PerformComparisonOperation(rFn,rFm); + + unordered: + /* ?? The FPA data sheet is pretty vague about this, in particular + about whether the non-E comparisons can ever raise exceptions. + This implementation is based on a combination of what it says in + the data sheet, observation of how the Acorn emulator actually + behaves (and how programs expect it to) and guesswork. */ + flags |= CC_OVERFLOW; + + if (BIT_AC & readFPSR()) flags |= CC_CARRY; + + if (e_flag) float_raise(float_flag_invalid); + + writeConditionCodes(flags); + return 1; +} diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c new file mode 100644 index 000000000..fb05fc6fb --- /dev/null +++ b/arch/arm/nwfpe/fpmodule.c @@ -0,0 +1,167 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + (c) Philip Blundell, 1998-1999 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" + +#ifdef MODULE +#include <linux/module.h> +#include <linux/version.h> +#else +#define MOD_INC_USE_COUNT +#define MOD_DEC_USE_COUNT +#endif + +/* XXX */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/spinlock.h> +#include <asm/atomic.h> +#include <asm/pgtable.h> +/* XXX */ + +#include "softfloat.h" +#include "fpopcode.h" +#include "fpmodule.h" +#include "fpa11.h" +#include "fpa11.inl" + +/* external data */ +extern FPA11 *fpa11; + +/* kernel symbols required for signal handling */ +typedef struct task_struct* PTASK; + +#ifdef MODULE +int fp_printk(const char *,...); +void fp_send_sig(unsigned long sig, PTASK p, int priv); +#if LINUX_VERSION_CODE > 0x20115 +MODULE_AUTHOR("Scott Bambrough <scottb@corelcomputer.com>"); +MODULE_DESCRIPTION("NWFPE floating point emulator"); +#endif + +#else +#define fp_printk printk +#define fp_send_sig send_sig +#define kern_fp_enter fp_enter +#endif + +/* kernel function prototypes required */ +void C_SYMBOL_NAME(fp_setup)(void); + +/* external declarations for saved kernel symbols */ +extern unsigned int C_SYMBOL_NAME(kern_fp_enter); + +/* forward declarations */ +extern void nwfpe_enter(void); + +/* Original value of fp_enter from kernel before patched by fpe_init. */ +static unsigned int orig_fp_enter; + +/* Address of user registers on the kernel stack. */ +unsigned int *userRegisters; + +void __init C_SYMBOL_NAME(fpe_version)(void) +{ + static const char szTitle[] = "<4>NetWinder Floating Point Emulator "; + static const char szVersion[] = "V0.94.1 "; + static const char szCopyright[] = "(c) 1998 Corel Computer Corp.\n"; + C_SYMBOL_NAME(fp_printk)(szTitle); + C_SYMBOL_NAME(fp_printk)(szVersion); + C_SYMBOL_NAME(fp_printk)(szCopyright); +} + +int __init fpe_init(void) +{ + /* Display title, version and copyright information. */ + C_SYMBOL_NAME(fpe_version)(); + + /* Save pointer to the old FP handler and then patch ourselves in */ + orig_fp_enter = C_SYMBOL_NAME(kern_fp_enter); + C_SYMBOL_NAME(kern_fp_enter) = (unsigned int)C_SYMBOL_NAME(nwfpe_enter); + + return 0; +} + +#ifdef MODULE +int init_module(void) +{ + return(fpe_init()); +} + +void cleanup_module(void) +{ + /* Restore the values we saved earlier. */ + C_SYMBOL_NAME(kern_fp_enter) = orig_fp_enter; +} +#endif + +#define _ARM_pc 60 +#define _ARM_cpsr 64 + +/* +ScottB: November 4, 1998 + +Moved this function out of softfloat-specialize into fpmodule.c. +This effectively isolates all the changes required for integrating with the +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying +fpmodule.c to integrate with the NetBSD kernel (I hope!). + +[1/1/99: Not quite true any more unfortunately. There is Linux-specific +code to access data in user space in some other source files at the +moment. --philb] + +float_exception_flags is a global variable in SoftFloat. + +This function is called by the SoftFloat routines to raise a floating +point exception. We check the trap enable byte in the FPSR, and raise +a SIGFPE exception if necessary. If not the relevant bits in the +cumulative exceptions flag byte are set and we return. +*/ + +void float_raise(signed char flags) +{ +#if 0 + printk(KERN_DEBUG "NWFPE: exception %08x at %08x from %08x\n", flags, + __builtin_return_address(0), userRegisters[15]); +#endif + + float_exception_flags |= flags; + if (readFPSR() & (flags << 16)) + { + /* raise exception */ + C_SYMBOL_NAME(fp_send_sig)(SIGFPE,C_SYMBOL_NAME(current),1); + } + else + { + /* set the cumulative exceptions flags */ + writeFPSR(flags); + } +} diff --git a/arch/arm/nwfpe/fpmodule.h b/arch/arm/nwfpe/fpmodule.h new file mode 100644 index 000000000..39c762935 --- /dev/null +++ b/arch/arm/nwfpe/fpmodule.h @@ -0,0 +1,53 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPMODULE_H__ +#define __FPMODULE_H__ + +#include <linux/config.h> + +#ifdef CONFIG_CPU_32 +#define REG_ORIG_R0 17 +#define REG_CPSR 16 +#else +#define REG_ORIG_R0 16 +#define REG_CPSR 15 +#endif + +#define REG_PC 15 +#define REG_LR 14 +#define REG_SP 13 +#define REG_IP 12 +#define REG_FP 11 +#define REG_R10 10 +#define REG_R9 9 +#define REG_R9 9 +#define REG_R8 8 +#define REG_R7 7 +#define REG_R6 6 +#define REG_R5 5 +#define REG_R4 4 +#define REG_R3 3 +#define REG_R2 2 +#define REG_R1 1 +#define REG_R0 0 + +#endif diff --git a/arch/arm/nwfpe/fpmodule.inl b/arch/arm/nwfpe/fpmodule.inl new file mode 100644 index 000000000..c76b7fd55 --- /dev/null +++ b/arch/arm/nwfpe/fpmodule.inl @@ -0,0 +1,88 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Address of user registers on the kernel stack. */ +extern unsigned int *userRegisters; + +extern __inline__ +unsigned int readRegister(const unsigned int nReg) +{ + /* Note: The CPU thinks it has dealt with the current instruction. As + a result the program counter has been advanced to the next + instruction, and points 4 bytes beyond the actual instruction + that caused the invalid instruction trap to occur. We adjust + for this in this routine. LDF/STF instructions with Rn = PC + depend on the PC being correct, as they use PC+8 in their + address calculations. */ + unsigned int val = userRegisters[nReg]; + + if (REG_PC == nReg) + val -= 4; + + return val; +} + +extern __inline__ +void writeRegister(const unsigned int nReg, const unsigned int val) +{ + userRegisters[nReg] = val; +} + +extern __inline__ +unsigned int readCPSR(void) +{ + return (readRegister(REG_CPSR)); +} + +extern __inline__ +void writeCPSR(const unsigned int val) +{ + writeRegister(REG_CPSR, val); +} + +extern __inline__ +unsigned int readConditionCodes(void) +{ +#ifdef __FPEM_TEST__ + return (0); +#else + return (readCPSR() & CC_MASK); +#endif +} + +extern __inline__ +void writeConditionCodes(const unsigned int val) +{ + unsigned int rval; + + /* + * Operate directly on userRegisters since + * the CPSR may be the PC register itself. + */ + rval = userRegisters[REG_CPSR] & ~CC_MASK; + userRegisters[REG_CPSR] = rval | (val & CC_MASK); +} + +extern __inline__ +unsigned int readMemoryInt(unsigned int *pMem) +{ + return *pMem; +} diff --git a/arch/arm/nwfpe/fpopcode.c b/arch/arm/nwfpe/fpopcode.c new file mode 100644 index 000000000..aa91e1e95 --- /dev/null +++ b/arch/arm/nwfpe/fpopcode.c @@ -0,0 +1,164 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpsr.h" +#include "fpa11.h" +#include "fpmodule.h" +#include "fpmodule.inl" + +static floatx80 floatx80Constant[] = { + { 0x0000, 0x0000000000000000ULL}, /* extended 0.0 */ + { 0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */ + { 0x4000, 0x8000000000000000ULL}, /* extended 2.0 */ + { 0x4000, 0xc000000000000000ULL}, /* extended 3.0 */ + { 0x4001, 0x8000000000000000ULL}, /* extended 4.0 */ + { 0x4001, 0xa000000000000000ULL}, /* extended 5.0 */ + { 0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */ + { 0x4002, 0xa000000000000000ULL} /* extended 10.0 */ +}; + +static float64 float64Constant[] = { + 0x0000000000000000ULL, /* double 0.0 */ + 0x3ff0000000000000ULL, /* double 1.0 */ + 0x4000000000000000ULL, /* double 2.0 */ + 0x4008000000000000ULL, /* double 3.0 */ + 0x4010000000000000ULL, /* double 4.0 */ + 0x4014000000000000ULL, /* double 5.0 */ + 0x3fe0000000000000ULL, /* double 0.5 */ + 0x4024000000000000ULL /* double 10.0 */ +}; + +static float32 float32Constant[] = { + 0x00000000, /* single 0.0 */ + 0x3f800000, /* single 1.0 */ + 0x40000000, /* single 2.0 */ + 0x40400000, /* single 3.0 */ + 0x40800000, /* single 4.0 */ + 0x40a00000, /* single 5.0 */ + 0x3f000000, /* single 0.5 */ + 0x41200000 /* single 10.0 */ +}; + +floatx80 getExtendedConstant(const unsigned int nIndex) +{ + return floatx80Constant[nIndex]; +} + +float64 getDoubleConstant(const unsigned int nIndex) +{ + return float64Constant[nIndex]; +} + +float32 getSingleConstant(const unsigned int nIndex) +{ + return float32Constant[nIndex]; +} + +unsigned int getTransferLength(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_TRANSFER_LENGTH) + { + case 0x00000000: nRc = 1; break; /* single precision */ + case 0x00008000: nRc = 2; break; /* double precision */ + case 0x00400000: nRc = 3; break; /* extended precision */ + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getRegisterCount(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_REGISTER_COUNT) + { + case 0x00000000: nRc = 4; break; + case 0x00008000: nRc = 1; break; + case 0x00400000: nRc = 2; break; + case 0x00408000: nRc = 3; break; + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getRoundingPrecision(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_ROUNDING_PRECISION) + { + case 0x00000000: nRc = 1; break; + case 0x00000080: nRc = 2; break; + case 0x00080000: nRc = 3; break; + default: nRc = 0; + } + + return(nRc); +} + +unsigned int getDestinationSize(const unsigned int opcode) +{ + unsigned int nRc; + + switch (opcode & MASK_DESTINATION_SIZE) + { + case 0x00000000: nRc = typeSingle; break; + case 0x00000080: nRc = typeDouble; break; + case 0x00080000: nRc = typeExtended; break; + default: nRc = typeNone; + } + + return(nRc); +} + +/* contition code lookup table + index into the table is test code: EQ, NE, ... LT, GT, AL, NV + bit position in short is condition code: NZCV */ +unsigned short aCC[16] = { + 0xF0F0, // EQ == Z set + 0x0F0F, // NE + 0xCCCC, // CS == C set + 0x3333, // CC + 0xFF00, // MI == N set + 0x00FF, // PL + 0xAAAA, // VS == V set + 0x5555, // VC + 0x0C0C, // HI == C set && Z clear + 0xF3F3, // LS == C clear || Z set + 0xAA55, // GE == (N==V) + 0x55AA, // LT == (N!=V) + 0x0A05, // GT == (!Z && (N==V)) + 0xF5FA, // LE == (Z || (N!=V)) + 0xFFFF, // AL always + 0 // NV +}; + +unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes) +{ + return (aCC[opcode>>28] >> (ccodes>>28)) & 1; +} diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h new file mode 100644 index 000000000..d6d7aa11a --- /dev/null +++ b/arch/arm/nwfpe/fpopcode.h @@ -0,0 +1,376 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPOPCODE_H__ +#define __FPOPCODE_H__ + +/* +ARM Floating Point Instruction Classes +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +|c o n d|1 1 0 P|U|u|W|L| Rn |v| Fd |0|0|0|1| o f f s e t | CPDT +|c o n d|1 1 0 P|U|w|W|L| Rn |x| Fd |0|0|0|1| o f f s e t | CPDT +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +|c o n d|1 1 1 0|a|b|c|d|e| Fn |j| Fd |0|0|0|1|f|g|h|0|i| Fm | CPDO +|c o n d|1 1 1 0|a|b|c|L|e| Fn | Rd |0|0|0|1|f|g|h|1|i| Fm | CPRT +|c o n d|1 1 1 0|a|b|c|1|e| Fn |1|1|1|1|0|0|0|1|f|g|h|1|i| Fm | comparisons +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + +CPDT data transfer instructions + LDF, STF, LFM, SFM + +CPDO dyadic arithmetic instructions + ADF, MUF, SUF, RSF, DVF, RDF, + POW, RPW, RMF, FML, FDV, FRD, POL + +CPDO monadic arithmetic instructions + MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP, + SIN, COS, TAN, ASN, ACS, ATN, URD, NRM + +CPRT joint arithmetic/data transfer instructions + FIX (arithmetic followed by load/store) + FLT (load/store followed by arithmetic) + CMF, CNF CMFE, CNFE (comparisons) + WFS, RFS (write/read floating point status register) + WFC, RFC (write/read floating point control register) + +cond condition codes +P pre/post index bit: 0 = postindex, 1 = preindex +U up/down bit: 0 = stack grows down, 1 = stack grows up +W write back bit: 1 = update base register (Rn) +L load/store bit: 0 = store, 1 = load +Rn base register +Rd destination/source register +Fd floating point destination register +Fn floating point source register +Fm floating point source register or floating point constant + +uv transfer length (TABLE 1) +wx register count (TABLE 2) +abcd arithmetic opcode (TABLES 3 & 4) +ef destination size (rounding precision) (TABLE 5) +gh rounding mode (TABLE 6) +j dyadic/monadic bit: 0 = dyadic, 1 = monadic +i constant bit: 1 = constant (TABLE 6) +*/ + +/* +TABLE 1 ++-------------------------+---+---+---------+---------+ +| Precision | u | v | FPSR.EP | length | ++-------------------------+---+---+---------+---------+ +| Single | 0 ü 0 | x | 1 words | +| Double | 1 ü 1 | x | 2 words | +| Extended | 1 ü 1 | x | 3 words | +| Packed decimal | 1 ü 1 | 0 | 3 words | +| Expanded packed decimal | 1 ü 1 | 1 | 4 words | ++-------------------------+---+---+---------+---------+ +Note: x = don't care +*/ + +/* +TABLE 2 ++---+---+---------------------------------+ +| w | x | Number of registers to transfer | ++---+---+---------------------------------+ +| 0 ü 1 | 1 | +| 1 ü 0 | 2 | +| 1 ü 1 | 3 | +| 0 ü 0 | 4 | ++---+---+---------------------------------+ +*/ + +/* +TABLE 3: Dyadic Floating Point Opcodes ++---+---+---+---+----------+-----------------------+-----------------------+ +| a | b | c | d | Mnemonic | Description | Operation | ++---+---+---+---+----------+-----------------------+-----------------------+ +| 0 | 0 | 0 | 0 | ADF | Add | Fd := Fn + Fm | +| 0 | 0 | 0 | 1 | MUF | Multiply | Fd := Fn * Fm | +| 0 | 0 | 1 | 0 | SUF | Subtract | Fd := Fn - Fm | +| 0 | 0 | 1 | 1 | RSF | Reverse subtract | Fd := Fm - Fn | +| 0 | 1 | 0 | 0 | DVF | Divide | Fd := Fn / Fm | +| 0 | 1 | 0 | 1 | RDF | Reverse divide | Fd := Fm / Fn | +| 0 | 1 | 1 | 0 | POW | Power | Fd := Fn ^ Fm | +| 0 | 1 | 1 | 1 | RPW | Reverse power | Fd := Fm ^ Fn | +| 1 | 0 | 0 | 0 | RMF | Remainder | Fd := IEEE rem(Fn/Fm) | +| 1 | 0 | 0 | 1 | FML | Fast Multiply | Fd := Fn * Fm | +| 1 | 0 | 1 | 0 | FDV | Fast Divide | Fd := Fn / Fm | +| 1 | 0 | 1 | 1 | FRD | Fast reverse divide | Fd := Fm / Fn | +| 1 | 1 | 0 | 0 | POL | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm) | +| 1 | 1 | 0 | 1 | | undefined instruction | trap | +| 1 | 1 | 1 | 0 | | undefined instruction | trap | +| 1 | 1 | 1 | 1 | | undefined instruction | trap | ++---+---+---+---+----------+-----------------------+-----------------------+ +Note: POW, RPW, POL are deprecated, and are available for backwards + compatibility only. +*/ + +/* +TABLE 4: Monadic Floating Point Opcodes ++---+---+---+---+----------+-----------------------+-----------------------+ +| a | b | c | d | Mnemonic | Description | Operation | ++---+---+---+---+----------+-----------------------+-----------------------+ +| 0 | 0 | 0 | 0 | MVF | Move | Fd := Fm | +| 0 | 0 | 0 | 1 | MNF | Move negated | Fd := - Fm | +| 0 | 0 | 1 | 0 | ABS | Absolute value | Fd := abs(Fm) | +| 0 | 0 | 1 | 1 | RND | Round to integer | Fd := int(Fm) | +| 0 | 1 | 0 | 0 | SQT | Square root | Fd := sqrt(Fm) | +| 0 | 1 | 0 | 1 | LOG | Log base 10 | Fd := log10(Fm) | +| 0 | 1 | 1 | 0 | LGN | Log base e | Fd := ln(Fm) | +| 0 | 1 | 1 | 1 | EXP | Exponent | Fd := e ^ Fm | +| 1 | 0 | 0 | 0 | SIN | Sine | Fd := sin(Fm) | +| 1 | 0 | 0 | 1 | COS | Cosine | Fd := cos(Fm) | +| 1 | 0 | 1 | 0 | TAN | Tangent | Fd := tan(Fm) | +| 1 | 0 | 1 | 1 | ASN | Arc Sine | Fd := arcsin(Fm) | +| 1 | 1 | 0 | 0 | ACS | Arc Cosine | Fd := arccos(Fm) | +| 1 | 1 | 0 | 1 | ATN | Arc Tangent | Fd := arctan(Fm) | +| 1 | 1 | 1 | 0 | URD | Unnormalized round | Fd := int(Fm) | +| 1 | 1 | 1 | 1 | NRM | Normalize | Fd := norm(Fm) | ++---+---+---+---+----------+-----------------------+-----------------------+ +Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are + available for backwards compatibility only. +*/ + +/* +TABLE 5 ++-------------------------+---+---+ +| Rounding Precision | e | f | ++-------------------------+---+---+ +| IEEE Single precision | 0 ü 0 | +| IEEE Double precision | 0 ü 1 | +| IEEE Extended precision | 1 ü 0 | +| undefined (trap) | 1 ü 1 | ++-------------------------+---+---+ +*/ + +/* +TABLE 5 ++---------------------------------+---+---+ +| Rounding Mode | g | h | ++---------------------------------+---+---+ +| Round to nearest (default) | 0 ü 0 | +| Round toward plus infinity | 0 ü 1 | +| Round toward negative infinity | 1 ü 0 | +| Round toward zero | 1 ü 1 | ++---------------------------------+---+---+ +*/ + +/* +=== +=== Definitions for load and store instructions +=== +*/ + +/* bit masks */ +#define BIT_PREINDEX 0x01000000 +#define BIT_UP 0x00800000 +#define BIT_WRITE_BACK 0x00200000 +#define BIT_LOAD 0x00100000 + +/* masks for load/store */ +#define MASK_CPDT 0x0c000000 /* data processing opcode */ +#define MASK_OFFSET 0x000000ff +#define MASK_TRANSFER_LENGTH 0x00408000 +#define MASK_REGISTER_COUNT MASK_TRANSFER_LENGTH +#define MASK_COPROCESSOR 0x00000f00 + +/* Tests for transfer length */ +#define TRANSFER_SINGLE 0x00000000 +#define TRANSFER_DOUBLE 0x00008000 +#define TRANSFER_EXTENDED 0x00400000 +#define TRANSFER_PACKED MASK_TRANSFER_LENGTH + +/* Get the coprocessor number from the opcode. */ +#define getCoprocessorNumber(opcode) ((opcode & MASK_COPROCESSOR) >> 8) + +/* Get the offset from the opcode. */ +#define getOffset(opcode) (opcode & MASK_OFFSET) + +/* Tests for specific data transfer load/store opcodes. */ +#define TEST_OPCODE(opcode,mask) (((opcode) & (mask)) == (mask)) + +#define LOAD_OP(opcode) TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD) +#define STORE_OP(opcode) ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT) + +#define LDF_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1)) +#define LFM_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2)) +#define STF_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1)) +#define SFM_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2)) + +#define PREINDEXED(opcode) ((opcode & BIT_PREINDEX) != 0) +#define POSTINDEXED(opcode) ((opcode & BIT_PREINDEX) == 0) +#define BIT_UP_SET(opcode) ((opcode & BIT_UP) != 0) +#define BIT_UP_CLEAR(opcode) ((opcode & BIT_DOWN) == 0) +#define WRITE_BACK(opcode) ((opcode & BIT_WRITE_BACK) != 0) +#define LOAD(opcode) ((opcode & BIT_LOAD) != 0) +#define STORE(opcode) ((opcode & BIT_LOAD) == 0) + +/* +=== +=== Definitions for arithmetic instructions +=== +*/ +/* bit masks */ +#define BIT_MONADIC 0x00008000 +#define BIT_CONSTANT 0x00000008 + +#define CONSTANT_FM(opcode) ((opcode & BIT_CONSTANT) != 0) +#define MONADIC_INSTRUCTION(opcode) ((opcode & BIT_MONADIC) != 0) + +/* instruction identification masks */ +#define MASK_CPDO 0x0e000000 /* arithmetic opcode */ +#define MASK_ARITHMETIC_OPCODE 0x00f08000 +#define MASK_DESTINATION_SIZE 0x00080080 + +/* dyadic arithmetic opcodes. */ +#define ADF_CODE 0x00000000 +#define MUF_CODE 0x00100000 +#define SUF_CODE 0x00200000 +#define RSF_CODE 0x00300000 +#define DVF_CODE 0x00400000 +#define RDF_CODE 0x00500000 +#define POW_CODE 0x00600000 +#define RPW_CODE 0x00700000 +#define RMF_CODE 0x00800000 +#define FML_CODE 0x00900000 +#define FDV_CODE 0x00a00000 +#define FRD_CODE 0x00b00000 +#define POL_CODE 0x00c00000 +/* 0x00d00000 is an invalid dyadic arithmetic opcode */ +/* 0x00e00000 is an invalid dyadic arithmetic opcode */ +/* 0x00f00000 is an invalid dyadic arithmetic opcode */ + +/* monadic arithmetic opcodes. */ +#define MVF_CODE 0x00008000 +#define MNF_CODE 0x00108000 +#define ABS_CODE 0x00208000 +#define RND_CODE 0x00308000 +#define SQT_CODE 0x00408000 +#define LOG_CODE 0x00508000 +#define LGN_CODE 0x00608000 +#define EXP_CODE 0x00708000 +#define SIN_CODE 0x00808000 +#define COS_CODE 0x00908000 +#define TAN_CODE 0x00a08000 +#define ASN_CODE 0x00b08000 +#define ACS_CODE 0x00c08000 +#define ATN_CODE 0x00d08000 +#define URD_CODE 0x00e08000 +#define NRM_CODE 0x00f08000 + +/* +=== +=== Definitions for register transfer and comparison instructions +=== +*/ + +#define MASK_CPRT 0x0e000010 /* register transfer opcode */ +#define MASK_CPRT_CODE 0x00f00000 +#define FLT_CODE 0x00000000 +#define FIX_CODE 0x00100000 +#define WFS_CODE 0x00200000 +#define RFS_CODE 0x00300000 +#define WFC_CODE 0x00400000 +#define RFC_CODE 0x00500000 +#define CMF_CODE 0x00900000 +#define CNF_CODE 0x00b00000 +#define CMFE_CODE 0x00d00000 +#define CNFE_CODE 0x00f00000 + +/* +=== +=== Common definitions +=== +*/ + +/* register masks */ +#define MASK_Rd 0x0000f000 +#define MASK_Rn 0x000f0000 +#define MASK_Fd 0x00007000 +#define MASK_Fm 0x00000007 +#define MASK_Fn 0x00070000 + +/* condition code masks */ +#define CC_MASK 0xf0000000 +#define CC_NEGATIVE 0x80000000 +#define CC_ZERO 0x40000000 +#define CC_CARRY 0x20000000 +#define CC_OVERFLOW 0x10000000 +#define CC_EQ 0x00000000 +#define CC_NE 0x10000000 +#define CC_CS 0x20000000 +#define CC_HS CC_CS +#define CC_CC 0x30000000 +#define CC_LO CC_CC +#define CC_MI 0x40000000 +#define CC_PL 0x50000000 +#define CC_VS 0x60000000 +#define CC_VC 0x70000000 +#define CC_HI 0x80000000 +#define CC_LS 0x90000000 +#define CC_GE 0xa0000000 +#define CC_LT 0xb0000000 +#define CC_GT 0xc0000000 +#define CC_LE 0xd0000000 +#define CC_AL 0xe0000000 +#define CC_NV 0xf0000000 + +/* rounding masks/values */ +#define MASK_ROUNDING_MODE 0x00000060 +#define ROUND_TO_NEAREST 0x00000000 +#define ROUND_TO_PLUS_INFINITY 0x00000020 +#define ROUND_TO_MINUS_INFINITY 0x00000040 +#define ROUND_TO_ZERO 0x00000060 + +#define MASK_ROUNDING_PRECISION 0x00080080 +#define ROUND_SINGLE 0x00000000 +#define ROUND_DOUBLE 0x00000080 +#define ROUND_EXTENDED 0x00080000 + +/* Get the condition code from the opcode. */ +#define getCondition(opcode) (opcode >> 28) + +/* Get the source register from the opcode. */ +#define getRn(opcode) ((opcode & MASK_Rn) >> 16) + +/* Get the destination floating point register from the opcode. */ +#define getFd(opcode) ((opcode & MASK_Fd) >> 12) + +/* Get the first source floating point register from the opcode. */ +#define getFn(opcode) ((opcode & MASK_Fn) >> 16) + +/* Get the second source floating point register from the opcode. */ +#define getFm(opcode) (opcode & MASK_Fm) + +/* Get the destination register from the opcode. */ +#define getRd(opcode) ((opcode & MASK_Rd) >> 12) + +/* Get the rounding mode from the opcode. */ +#define getRoundingMode(opcode) ((opcode & MASK_ROUNDING_MODE) >> 5) + +float32 getSingleConstant(const unsigned int nIndex); +float64 getDoubleConstant(const unsigned int nIndex); +floatx80 getExtendedConstant(const unsigned int nIndex); + +unsigned int getRegisterCount(const unsigned int opcode); +unsigned int getDestinationSize(const unsigned int opcode); + +#endif diff --git a/arch/arm/nwfpe/fpsr.h b/arch/arm/nwfpe/fpsr.h new file mode 100644 index 000000000..f58994ac2 --- /dev/null +++ b/arch/arm/nwfpe/fpsr.h @@ -0,0 +1,108 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __FPSR_H__ +#define __FPSR_H__ + +/* +The FPSR is a 32 bit register consisting of 4 parts, each exactly +one byte. + + SYSTEM ID + EXCEPTION TRAP ENABLE BYTE + SYSTEM CONTROL BYTE + CUMULATIVE EXCEPTION FLAGS BYTE + +The FPCR is a 32 bit register consisting of bit flags. +*/ + +/* SYSTEM ID +------------ +Note: the system id byte is read only */ + +typedef unsigned int FPSR; /* type for floating point status register */ +typedef unsigned int FPCR; /* type for floating point control register */ + +#define MASK_SYSID 0xff000000 +#define BIT_HARDWARE 0x80000000 +#define FP_EMULATOR 0x01000000 /* System ID for emulator */ +#define FP_ACCELERATOR 0x81000000 /* System ID for FPA11 */ + +/* EXCEPTION TRAP ENABLE BYTE +----------------------------- */ + +#define MASK_TRAP_ENABLE 0x00ff0000 +#define MASK_TRAP_ENABLE_STRICT 0x001f0000 +#define BIT_IXE 0x00100000 /* inexact exception enable */ +#define BIT_UFE 0x00080000 /* underflow exception enable */ +#define BIT_OFE 0x00040000 /* overflow exception enable */ +#define BIT_DZE 0x00020000 /* divide by zero exception enable */ +#define BIT_IOE 0x00010000 /* invalid operation exception enable */ + +/* SYSTEM CONTROL BYTE +---------------------- */ + +#define MASK_SYSTEM_CONTROL 0x0000ff00 +#define MASK_TRAP_STRICT 0x00001f00 + +#define BIT_AC 0x00100000 /* use alternative C-flag definition + for compares */ +#define BIT_EP 0x00080000 /* use expanded packed decimal format */ +#define BIT_SO 0x00040000 /* select synchronous operation of FPA */ +#define BIT_NE 0x00020000 /* NaN exception bit */ +#define BIT_ND 0x00010000 /* no denormalized numbers bit */ + +/* CUMULATIVE EXCEPTION FLAGS BYTE +---------------------------------- */ + +#define MASK_EXCEPTION_FLAGS 0x000000ff +#define MASK_EXCEPTION_FLAGS_STRICT 0x0000001f + +#define BIT_IXC 0x00000010 /* inexact exception flag */ +#define BIT_UFC 0x00000008 /* underflow exception flag */ +#define BIT_OFC 0x00000004 /* overfloat exception flag */ +#define BIT_DZC 0x00000002 /* divide by zero exception flag */ +#define BIT_IOC 0x00000001 /* invalid operation exception flag */ + +/* Floating Point Control Register +----------------------------------*/ + +#define BIT_RU 0x80000000 /* rounded up bit */ +#define BIT_IE 0x10000000 /* inexact bit */ +#define BIT_MO 0x08000000 /* mantissa overflow bit */ +#define BIT_EO 0x04000000 /* exponent overflow bit */ +#define BIT_SB 0x00000800 /* store bounce */ +#define BIT_AB 0x00000400 /* arithmetic bounce */ +#define BIT_RE 0x00000200 /* rounding exception */ +#define BIT_DA 0x00000100 /* disable FPA */ + +#define MASK_OP 0x00f08010 /* AU operation code */ +#define MASK_PR 0x00080080 /* AU precision */ +#define MASK_S1 0x00070000 /* AU source register 1 */ +#define MASK_S2 0x00000007 /* AU source register 2 */ +#define MASK_DS 0x00007000 /* AU destination register */ +#define MASK_RM 0x00000060 /* AU rounding mode */ +#define MASK_ALU 0x9cfff2ff /* only ALU can write these bits */ +#define MASK_RESET 0x00000d00 /* bits set on reset, all others cleared */ +#define MASK_WFC MASK_RESET +#define MASK_RFC ~MASK_RESET + +#endif diff --git a/arch/arm/nwfpe/milieu.h b/arch/arm/nwfpe/milieu.h new file mode 100644 index 000000000..a3892ab2d --- /dev/null +++ b/arch/arm/nwfpe/milieu.h @@ -0,0 +1,48 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "ARM-gcc.h" + +/* +------------------------------------------------------------------------------- +Symbolic Boolean literals. +------------------------------------------------------------------------------- +*/ +enum { + FALSE = 0, + TRUE = 1 +}; + diff --git a/arch/arm/nwfpe/single_cpdo.c b/arch/arm/nwfpe/single_cpdo.c new file mode 100644 index 000000000..f8405ee57 --- /dev/null +++ b/arch/arm/nwfpe/single_cpdo.c @@ -0,0 +1,259 @@ +/* + NetWinder Floating Point Emulator + (c) Corel Computer Corporation, 1998 + + Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "config.h" +#include "milieu.h" +#include "softfloat.h" +#include "fpopcode.h" +#include "fpa11.h" + +float32 getSingleConstant(unsigned int); + +float32 float32_exp(float32 Fm); +float32 float32_ln(float32 Fm); +float32 float32_sin(float32 rFm); +float32 float32_cos(float32 rFm); +float32 float32_arcsin(float32 rFm); +float32 float32_arctan(float32 rFm); +float32 float32_log(float32 rFm); +float32 float32_tan(float32 rFm); +float32 float32_arccos(float32 rFm); +float32 float32_pow(float32 rFn,float32 rFm); +float32 float32_pol(float32 rFn,float32 rFm); + +unsigned int SingleCPDO(const unsigned int opcode) +{ + float32 rFm, rFn; + unsigned int Fd, Fm, Fn, nRc = 1; + + Fm = getFm(opcode); + if (CONSTANT_FM(opcode)) + { + rFm = getSingleConstant(Fm); + } + else + { + switch (fpa11->fpreg[Fm].fType) + { + case typeSingle: + rFm = fpa11->fpreg[Fm].fValue.fSingle; + break; + + default: return 0; + } + } + + if (!MONADIC_INSTRUCTION(opcode)) + { + Fn = getFn(opcode); + switch (fpa11->fpreg[Fn].fType) + { + case typeSingle: + rFn = fpa11->fpreg[Fn].fValue.fSingle; + break; + + default: return 0; + } + } + + Fd = getFd(opcode); + switch (opcode & MASK_ARITHMETIC_OPCODE) + { + /* dyadic opcodes */ + case ADF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_add(rFn,rFm); + break; + + case MUF_CODE: + case FML_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_mul(rFn,rFm); + break; + + case SUF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sub(rFn,rFm); + break; + + case RSF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sub(rFm,rFn); + break; + + case DVF_CODE: + case FDV_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_div(rFn,rFm); + break; + + case RDF_CODE: + case FRD_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_div(rFm,rFn); + break; + +#if 0 + case POW_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_pow(rFn,rFm); + break; + + case RPW_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_pow(rFm,rFn); + break; +#endif + + case RMF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_rem(rFn,rFm); + break; + +#if 0 + case POL_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_pol(rFn,rFm); + break; +#endif + + /* monadic opcodes */ + case MVF_CODE: + fpa11->fpreg[Fd].fValue.fSingle = rFm; + break; + + case MNF_CODE: + rFm ^= 0x80000000; + fpa11->fpreg[Fd].fValue.fSingle = rFm; + break; + + case ABS_CODE: + rFm &= 0x7fffffff; + fpa11->fpreg[Fd].fValue.fSingle = rFm; + break; + + case RND_CODE: + case URD_CODE: + fpa11->fpreg[Fd].fValue.fSingle = + int32_to_float32(float32_to_int32(rFm)); + break; + + case SQT_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sqrt(rFm); + break; + +#if 0 + case LOG_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_log(rFm); + break; + + case LGN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_ln(rFm); + break; + + case EXP_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_exp(rFm); + break; + + case SIN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_sin(rFm); + break; + + case COS_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_cos(rFm); + break; + + case TAN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_tan(rFm); + break; + + case ASN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_arcsin(rFm); + break; + + case ACS_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_arccos(rFm); + break; + + case ATN_CODE: + fpa11->fpreg[Fd].fValue.fSingle = float32_arctan(rFm); + break; +#endif + + case NRM_CODE: + break; + + default: + { + nRc = 0; + } + } + + if (0 != nRc) fpa11->fpreg[Fd].fType = typeSingle; + return nRc; +} + +#if 0 +float32 float32_exp(float32 Fm) +{ +//series +} + +float32 float32_ln(float32 Fm) +{ +//series +} + +float32 float32_sin(float32 rFm) +{ +//series +} + +float32 float32_cos(float32 rFm) +{ +//series +} + +float32 float32_arcsin(float32 rFm) +{ +//series +} + +float32 float32_arctan(float32 rFm) +{ + //series +} + +float32 float32_arccos(float32 rFm) +{ + //return float32_sub(halfPi,float32_arcsin(rFm)); +} + +float32 float32_log(float32 rFm) +{ + return float32_div(float32_ln(rFm),getSingleConstant(7)); +} + +float32 float32_tan(float32 rFm) +{ + return float32_div(float32_sin(rFm),float32_cos(rFm)); +} + +float32 float32_pow(float32 rFn,float32 rFm) +{ + return float32_exp(float32_mul(rFm,float32_ln(rFn))); +} + +float32 float32_pol(float32 rFn,float32 rFm) +{ + return float32_arctan(float32_div(rFn,rFm)); +} +#endif diff --git a/arch/arm/nwfpe/softfloat-macros b/arch/arm/nwfpe/softfloat-macros new file mode 100644 index 000000000..5469989f2 --- /dev/null +++ b/arch/arm/nwfpe/softfloat-macros @@ -0,0 +1,740 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; +} + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 64, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +{ + bits64 z; + + __asm__("@shift64RightJamming -- start"); + if ( count == 0 ) { + z = a; + } + else if ( count < 64 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + __asm__("@shift64RightJamming -- end"); + *zPtr = z; +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +_plus_ the number of bits given in `count'. The shifted result is at most +64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +bits shifted off form a second 64-bit result as follows: The _last_ bit +shifted off is the most-significant bit of the extra result, and the other +63 bits of the extra result are all zero if and only if _all_but_the_last_ +bits shifted off were all zero. This extra result is stored in the location +pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0' and `a1' are considered to form a +fixed-point value with binary point between `a0' and `a1'. This fixed-point +value is shifted right by the number of bits given in `count', and the +integer part of the result is returned at the location pointed to by +`z0Ptr'. The fractional part of the result may be slightly corrupted as +described above, and is returned at the location pointed to by `z1Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift64ExtraRightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1 != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 128, the result will be 0. The result is broken into two 64-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128Right( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 128, the result will be either 0 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 64-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shift128RightJamming( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z0, z1; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 64 ) { + z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 128 ) { + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right +by 64 _plus_ the number of bits given in `count'. The shifted result is +at most 128 nonzero bits; these are broken into two 64-bit pieces which are +stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted +off form a third 64-bit result as follows: The _last_ bit shifted off is +the most-significant bit of the extra result, and the other 63 bits of the +extra result are all zero if and only if _all_but_the_last_ bits shifted off +were all zero. This extra result is stored in the location pointed to by +`z2Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0', `a1', and `a2' are considered +to form a fixed-point value with binary point between `a1' and `a2'. This +fixed-point value is shifted right by the number of bits given in `count', +and the integer part of the result is returned at the locations pointed to +by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly +corrupted as described above, and is returned at the location pointed to by +`z2Ptr'.) +------------------------------------------------------------------------------- +*/ +INLINE void + shift128ExtraRightJamming( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount = ( - count ) & 63; + + if ( count == 0 ) { + z2 = a2; + z1 = a1; + z0 = a0; + } + else { + if ( count < 64 ) { + z2 = a1<<negCount; + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + if ( count == 64 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 128 ) { + z2 = a0<<negCount; + z1 = a0>>( count & 63 ); + } + else { + z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 64. The result is broken into two 64-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift128Left( + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1<<count; + *z0Ptr = + ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 64. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + shortShift192Left( + bits64 a0, + bits64 a1, + bits64 a2, + int16 count, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 negCount; + + z2 = a2<<count; + z1 = a1<<count; + z0 = a0<<count; + if ( 0 < count ) { + negCount = ( ( - count ) & 63 ); + z1 |= a2>>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +any carry out is lost. The result is broken into two 64-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits64 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^192, so any carry out is lost. The result is broken into three +64-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + add192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^128, so any borrow out (carry out) is lost. The result is broken into two +64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub128( + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +result is broken into three 64-bit pieces which are stored at the locations +pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + sub192( + bits64 a0, + bits64 a1, + bits64 a2, + bits64 b0, + bits64 b1, + bits64 b2, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +into two 64-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) +{ + bits32 aHigh, aLow, bHigh, bLow; + bits64 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>32; + bLow = b; + bHigh = b>>32; + z1 = ( (bits64) aLow ) * bLow; + zMiddleA = ( (bits64) aLow ) * bHigh; + zMiddleB = ( (bits64) aHigh ) * bLow; + z0 = ( (bits64) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to +obtain a 192-bit product. The product is broken into three 64-bit pieces +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128By64To192( + bits64 a0, + bits64 a1, + bits64 b, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr + ) +{ + bits64 z0, z1, z2, more1; + + mul64To128( a1, b, &z1, &z2 ); + mul64To128( a0, b, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +product. The product is broken into four 64-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +INLINE void + mul128To256( + bits64 a0, + bits64 a1, + bits64 b0, + bits64 b1, + bits64 *z0Ptr, + bits64 *z1Ptr, + bits64 *z2Ptr, + bits64 *z3Ptr + ) +{ + bits64 z0, z1, z2, z3; + bits64 more1, more2; + + mul64To128( a1, b1, &z2, &z3 ); + mul64To128( a1, b0, &z1, &more2 ); + add128( z1, more2, 0, z2, &z1, &z2 ); + mul64To128( a0, b0, &z0, &more1 ); + add128( z0, more1, 0, z1, &z0, &z1 ); + mul64To128( a0, b1, &more1, &more2 ); + add128( more1, more2, 0, z2, &more1, &z2 ); + add128( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 64-bit integer quotient obtained by dividing +`b' into the 128-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^63. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +{ + bits64 b0, b1; + bits64 rem0, rem1, term0, term1; + bits64 z; + if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); + b0 = b>>32; + z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; + mul64To128( b, z, &term0, &term1 ); + sub128( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits64) rem0 ) < 0 ) { + z -= LIT64( 0x100000000 ); + b1 = b<<32; + add128( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<32 ) | ( rem1>>32 ); + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit +of `a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit +of `a'. If `a' is zero, 64 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros64( bits64 a ) +{ + int8 shiftCount; + + shiftCount = 0; + if ( a < ( (bits64) 1 )<<32 ) { + shiftCount += 32; + } + else { + a >>= 32; + } + shiftCount += countLeadingZeros32( a ); + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +is equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is +not equal to the 128-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/arch/arm/nwfpe/softfloat-specialize b/arch/arm/nwfpe/softfloat-specialize new file mode 100644 index 000000000..f03e5c6d4 --- /dev/null +++ b/arch/arm/nwfpe/softfloat-specialize @@ -0,0 +1,471 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. + +ScottB: November 4, 1998 +Moved this function out of softfloat-specialize into fpmodule.c. +This effectively isolates all the changes required for integrating with the +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying +fpmodule.c to integrate with the NetBSD kernel (I hope!). +------------------------------------------------------------------------------- +void float_raise( int8 flags ) +{ + float_exception_flags |= flags; +} +*/ + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_nan( float32 a ) +{ + + return ( 0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_nan( float64 a ) +{ + + return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>63; + z.low = 0; + z.high = a<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= LIT64( 0x0008000000000000 ); + b |= LIT64( 0x0008000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + //register int lr; + bits64 aLow; + + //__asm__("mov %0, lr" : : "g" (lr)); + //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr); + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated quadruple-precision NaN. The `high' and +`low' values hold the most- and least-significant bits, respectively. +------------------------------------------------------------------------------- +*/ +#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_nan( float128 a ) +{ + + return + ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_signaling_nan( float128 a ) +{ + + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float128ToCommonNaN( float128 a ) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>63; + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the quadruple- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two quadruple-precision floating-point values `a' and `b', one of +which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float128 propagateFloat128NaN( float128 a, float128 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c new file mode 100644 index 000000000..a7fc76cc8 --- /dev/null +++ b/arch/arm/nwfpe/softfloat.c @@ -0,0 +1,4877 @@ +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include "milieu.h" +#include "softfloat.h" + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode, extended double-precision rounding precision, +and exception flags. +------------------------------------------------------------------------------- +*/ +int8 float_rounding_mode = float_round_nearest_even; +int8 floatx80_rounding_precision = 80; +int8 float_exception_flags = 0; + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize" + +/* +------------------------------------------------------------------------------- +Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +and 7, and returns the properly rounded 32-bit integer corresponding to the +input. If `zSign' is nonzero, the input is negated before being converted +to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point +input is simply rounded to an integer, with the inexact exception raised if +the input cannot be represented exactly as an integer. If the fixed-point +input is too large, however, the invalid exception is raised and the largest +positive or negative integer is returned. +------------------------------------------------------------------------------- +*/ +static int32 roundAndPackInt32( flag zSign, bits64 absZ ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + int32 z; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_exception_flags |= float_flag_invalid; + return zSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +single-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ +#if 0 + float32 f; + __asm__("@ packFloat32; + mov %0, %1, asl #31; + orr %0, %2, asl #23; + orr %0, %3" + : /* no outputs */ + : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig) + : "cc"); + return f; +#else + return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; +#endif +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the single-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal single- +precision floating-point number. + The input significand `zSig' has its binary point between bits 30 +and 29, which is 7 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (bits16) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized in +any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat64Frac( float64 a ) +{ + + return a & LIT64( 0x000FFFFFFFFFFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return ( a>>52 ) & 0x7FF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return a>>63; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ) - 11; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +double-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + + return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the double-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal double- +precision floating-point number. + The input significand `zSig' has its binary point between bits 62 +and 61, which is 10 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int16 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x200; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x3FF; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x3FF; + if ( 0x7FD <= (bits16) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) + ) { + //register int lr; + //__asm__("mov %0, lr" :: "g" (lr)); + //fp_printk("roundAndPackFloat64 called from 0x%08x\n",lr); + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); + shift64RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x3FF; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper double-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat64' except that `zSig' does not have to be normalized in +any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float64 + normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( zSig ) - 1; + return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the extended double-precision floating-point +value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloatx80Frac( floatx80 a ) +{ + + return a.low; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the extended double-precision floating-point +value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int32 extractFloatx80Exp( floatx80 a ) +{ + + return a.high & 0x7FFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the extended double-precision floating-point value +`a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloatx80Sign( floatx80 a ) +{ + + return a.high>>15; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal extended double-precision floating-point value +represented by the denormalized significand `aSig'. The normalized exponent +and significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros64( aSig ); + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into an +extended double-precision floating-point value, returning the result. +------------------------------------------------------------------------------- +*/ +INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) +{ + floatx80 z; + + z.low = zSig; + z.high = ( ( (bits16) zSign )<<15 ) + zExp; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and extended significand formed by the concatenation of `zSig0' and `zSig1', +and returns the proper extended double-precision floating-point value +corresponding to the abstract input. Ordinarily, the abstract value is +rounded and packed into the extended double-precision format, with the +inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal extended +double-precision floating-point number. + If `roundingPrecision' is 32 or 64, the result is rounded to the same +number of bits as single or double precision, respectively. Otherwise, the +result is rounded to the full precision of the extended double-precision +format. + The input significand must be normalized or smaller. If the input +significand is not normalized, `zExp' must be 0; in that case, the result +returned is a subnormal number, and it must not require rounding. The +handling of underflow and overflow follows the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 + roundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + ) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + int64 roundIncrement, roundMask, roundBits; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } + else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } + else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; + } + if ( zExp <= 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ( zSig0 <= zSig0 + roundIncrement ); + shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( (sbits64) zSig0 < 0 ) zExp = 1; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + increment = ( (sbits64) zSig1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + } + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) + && increment + ) + ) { + roundMask = 0; + overflow: + float_raise( float_flag_overflow | float_flag_inexact ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( zExp <= 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < 0 ) + || ! increment + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); + zExp = 0; + if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); + if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( roundNearestEven ) { + increment = ( (sbits64) zSig1 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } + else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + if ( increment ) { + ++zSig0; + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); + if ( (sbits64) zSig0 < 0 ) zExp = 1; + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + else { + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); + } + } + else { + if ( zSig0 == 0 ) zExp = 0; + } + + return packFloatx80( zSign, zExp, zSig0 ); +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent +`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +and returns the proper extended double-precision floating-point value +corresponding to the abstract input. This routine is just like +`roundAndPackFloatx80' except that the input significand does not have to be +normalized. +------------------------------------------------------------------------------- +*/ +static floatx80 + normalizeRoundAndPackFloatx80( + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + ) +{ + int8 shiftCount; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return + roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the least-significant 64 fraction bits of the quadruple-precision +floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat128Frac1( float128 a ) +{ + + return a.low; + +} + +/* +------------------------------------------------------------------------------- +Returns the most-significant 48 fraction bits of the quadruple-precision +floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits64 extractFloat128Frac0( float128 a ) +{ + + return a.high & LIT64( 0x0000FFFFFFFFFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the quadruple-precision floating-point value +`a'. +------------------------------------------------------------------------------- +*/ +INLINE int32 extractFloat128Exp( float128 a ) +{ + + return ( a.high>>48 ) & 0x7FFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the quadruple-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat128Sign( float128 a ) +{ + + return a.high>>63; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal quadruple-precision floating-point value +represented by the denormalized significand formed by the concatenation of +`aSig0' and `aSig1'. The normalized exponent is stored at the location +pointed to by `zExpPtr'. The most significant 49 bits of the normalized +significand are stored at the location pointed to by `zSig0Ptr', and the +least significant 64 bits of the normalized significand are stored at the +location pointed to by `zSig1Ptr'. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat128Subnormal( + bits64 aSig0, + bits64 aSig1, + int32 *zExpPtr, + bits64 *zSig0Ptr, + bits64 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros64( aSig1 ) - 15; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 63 ); + } + else { + *zSig0Ptr = aSig1<<shiftCount; + *zSig1Ptr = 0; + } + *zExpPtr = - shiftCount - 63; + } + else { + shiftCount = countLeadingZeros64( aSig0 ) - 15; + shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); + *zExpPtr = 1 - shiftCount; + } + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', the exponent `zExp', and the significand formed +by the concatenation of `zSig0' and `zSig1' into a quadruple-precision +floating-point value, returning the result. After being shifted into the +proper positions, the three fields `zSign', `zExp', and `zSig0' are simply +added together to form the most significant 32 bits of the result. This +means that any integer portion of `zSig0' will be added into the exponent. +Since a properly normalized significand will have an integer portion equal +to 1, the `zExp' input should be 1 less than the desired result exponent +whenever `zSig0' and `zSig1' concatenated form a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float128 + packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +{ + float128 z; + + z.low = zSig1; + z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and extended significand formed by the concatenation of `zSig0', `zSig1', +and `zSig2', and returns the proper quadruple-precision floating-point value +corresponding to the abstract input. Ordinarily, the abstract value is +simply rounded and packed into the quadruple-precision format, with the +inexact exception raised if the abstract input cannot be represented +exactly. If the abstract value is too large, however, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal quadruple- +precision floating-point number. + The input significand must be normalized or smaller. If the input +significand is not normalized, `zExp' must be 0; in that case, the result +returned is a subnormal number, and it must not require rounding. In the +usual case that the input significand is normalized, `zExp' must be 1 less +than the ``true'' floating-point exponent. The handling of underflow and +overflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 + roundAndPackFloat128( + flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + increment = ( (sbits64) zSig2 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + if ( 0x7FFD <= (bits32) zExp ) { + if ( ( 0x7FFD < zExp ) + || ( ( zExp == 0x7FFD ) + && eq128( + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ), + zSig0, + zSig1 + ) + && increment + ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return + packFloat128( + zSign, + 0x7FFE, + LIT64( 0x0000FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ! increment + || lt128( + zSig0, + zSig1, + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); + zExp = 0; + if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); + if ( roundNearestEven ) { + increment = ( (sbits64) zSig2 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + } + if ( zSig2 ) float_exception_flags |= float_flag_inexact; + if ( increment ) { + add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); + zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + } + else { + if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; + } + return packFloat128( zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand formed by the concatenation of `zSig0' and `zSig1', and +returns the proper quadruple-precision floating-point value corresponding to +the abstract input. This routine is just like `roundAndPackFloat128' except +that the input significand has fewer bits and does not have to be normalized +in any way. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float128 + normalizeRoundAndPackFloat128( + flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +{ + int8 shiftCount; + bits64 zSig2; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ) - 15; + if ( 0 <= shiftCount ) { + zSig2 = 0; + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + } + else { + shift128ExtraRightJamming( + zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); + } + zExp -= shiftCount; + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the single-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int32 a ) +{ + flag zSign; + + if ( a == 0 ) return 0; + if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the double-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 int32_to_float64( int32 a ) +{ + flag aSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return 0; + aSign = ( a < 0 ); + absA = aSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 21; + zSig = absA; + return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' +to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 int32_to_floatx80( int32 a ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig; + + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 32; + zSig = absA; + return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the quadruple-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 int32_to_float128( int32 a ) +{ + flag zSign; + uint32 absA; + int8 shiftCount; + bits64 zSig0; + + if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 17; + zSig0 = absA; + return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + bits64 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= 0x00800000; + shiftCount = 0xAF - aExp; + zSig = aSig; + zSig <<= 32; + if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); + return roundAndPackInt32( aSign, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( a == 0xCF000000 ) return 0x80000000; + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + return 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + float_exception_flags |= float_flag_inexact; + } + return aSign ? - z : z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float32_to_floatx80( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + aSig |= 0x00800000; + return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float32_to_float128( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, and +returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig; + bits64 zSig64; + bits32 zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); + zSig = zSig64; + if ( 0 <= (sbits32) ( zSig<<1 ) ) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = ( ( (bits64) aSig )<<32 ) / bSig; + if ( ( zSig & 0x3F ) == 0 ) { + zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig; + bits32 q; + bits64 aSig64, bSig64, q64; + bits32 alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig |= 0x00800000; + bSig |= 0x00800000; + if ( expDiff < 32 ) { + aSig <<= 8; + bSig <<= 8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + if ( 0 < expDiff ) { + q = ( ( (bits64) aSig )<<32 ) / bSig; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + } + else { + if ( bSig <= aSig ) aSig -= bSig; + aSig64 = ( (bits64) aSig )<<40; + bSig64 = ( (bits64) bSig )<<40; + expDiff -= 64; + while ( 0 < expDiff ) { + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + aSig64 = - ( ( bSig * q64 )<<38 ); + expDiff -= 62; + } + expDiff += 64; + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + q = q64>>( 64 - expDiff ); + bSig <<= 6; + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig; + bits64 rem, term; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0xFFFFFFFF; + } + else { + aSig >>= aExp & 1; + term = ( (bits64) zSig ) * zSig; + rem = ( ( (bits64) aSig )<<32 ) - term; + while ( (sbits64) rem < 0 ) { + --zSig; + rem += ( ( (bits64) zSig )<<1 ) | 1; + } + zSig |= ( rem != 0 ); + } + } + shift32RightJamming( zSig, 1, &zSig ); + return roundAndPackFloat32( 0, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + //int16 aExp, bExp; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = 0x433 - aExp; + if ( shiftCount < 21 ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( 52 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement unsigned integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest positive integer is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_uint32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = 0; //extractFloat64Sign( a ); + //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest positive integer is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_uint32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = 0x433 - aExp; + if ( shiftCount < 21 ) { + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + goto invalid; + } + else if ( 52 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + bits32 zSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 22, &aSig ); + zSig = aSig; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the extended double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float64_to_floatx80( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the quadruple-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float64_to_float128( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, and +returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + if ( aExp <= 0x3FE ) { + if ( (bits64) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { + return packFloat64( aSign, 0x3FF, 0 ); + } + break; + case float_round_down: + return aSign ? LIT64( 0xBFF0000000000000 ) : 0; + case float_round_up: + return + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); + } + return packFloat64( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= LIT64( 0x2000000000000000 ); + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits64) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig | bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + bSig |= LIT64( 0x4000000000000000 ); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + aSig |= LIT64( 0x4000000000000000 ); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FF; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits64) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits64 aSig, bSig, zSig; + bits64 rem0, rem1; + bits64 term0, term1; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0 ); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FD; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64( aSig, 0, bSig ); + if ( ( zSig & 0x1FF ) <= 2 ) { + mul64To128( bSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat64( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits64 aSig, bSig; + bits64 q, alternateASig; + sbits64 sigMean; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( bExp == 0x7FF ) { + if ( bSig ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits64) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits64) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits64 aSig, zSig; + bits64 rem0, rem1, term0, term1; //, shiftedRem; + //float64 z; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig |= LIT64( 0x0010000000000000 ); + zSig = estimateSqrt32( aExp, aSig>>21 ); + zSig <<= 31; + aSig <<= 9 - ( aExp & 1 ); + zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2; + if ( ( zSig & 0x3FF ) <= 5 ) { + if ( zSig < 2 ) { + zSig = LIT64( 0xFFFFFFFFFFFFFFFF ); + } + else { + aSig <<= 2; + mul64To128( zSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig; + shortShift128Left( 0, zSig, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + } + shift64RightJamming( zSig, 1, &zSig ); + return roundAndPackFloat64( 0, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to the +corresponding value `b', and 0 otherwise. The invalid exception is raised +if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + //int16 aExp, bExp; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic---which means in particular that the conversion +is rounded according to the current rounding mode. If `a' is a NaN, the +largest positive integer is returned. Otherwise, if the conversion +overflows, the largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32( aSign, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the 32-bit two's complement integer format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic, except that the conversion is always rounded +toward zero. If `a' is a NaN, the largest positive integer is returned. +Otherwise, if the conversion overflows, the largest integer with the same +sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 floatx80_to_int32_round_to_zero( floatx80 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig, savedASig; + int32 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount < 32 ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( 63 < shiftCount ) { + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the single-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 floatx80_to_float32( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 33, &aSig ); + if ( aExp || aSig ) aExp -= 0x3F81; + return roundAndPackFloat32( aSign, aExp, aSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 floatx80_to_float64( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig, zSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shift64RightJamming( aSig, 1, &zSig ); + if ( aExp || aSig ) aExp -= 0x3C01; + return roundAndPackFloat64( aSign, aExp, zSig ); + +} + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point value `a' to the quadruple-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 floatx80_to_float128( floatx80 a ) +{ + flag aSign; + int16 aExp; + bits64 aSig, zSig0, zSig1; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { + return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); + } + shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp, zSig0, zSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the extended double-precision floating-point value `a' to an integer, +and returns the result as an extended quadruple-precision floating-point +value. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + floatx80 z; + + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN( a, a ); + } + return a; + } + if ( aExp <= 0x3FFE ) { + if ( ( aExp == 0 ) + && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + return a; + } + float_exception_flags |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.low += lastBitMask>>1; + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z.low += roundBitsMask; + } + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the extended double- +precision floating-point values `a' and `b'. If `zSign' is true, the sum is +negated before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; + } + zExp = aExp; + goto shiftRight1; + } + + zSig0 = aSig + bSig; + + if ( (sbits64) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the extended +double-precision floating-point values `a' and `b'. If `zSign' is true, +the difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + int32 expDiff; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) ++expDiff; + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) --expDiff; + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return + normalizeRoundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the extended double-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_add( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs( a, b, aSign ); + } + else { + return subFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sub( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs( a, b, aSign ); + } + else { + return addFloatx80Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the extended double-precision floating- +point values `a' and `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_mul( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (sbits64) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the extended double-precision floating-point +value `a' by the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_div( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig, bSig, zSig0, zSig1; + bits64 rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (bits64) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the extended double-precision floating-point value +`a' with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_rem( floatx80 a, floatx80 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig; + bits64 q, term0, term1, alternateASig0, alternateASig1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (bits64) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the extended double-precision floating-point +value `a'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_sqrt( floatx80 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + bits64 shiftedRem0, shiftedRem1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + zSig0 <<= 31; + aSig1 = 0; + shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4; + if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); + shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 ); + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + shortShift128Left( 0, zSig0, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 ); + zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); + if ( (bits64) ( zSig1<<1 ) <= 10 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( zSig0, zSig1, &term1, &term2 ); + shortShift128Left( term1, term2, 1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); + term3 |= 1; + add192( + rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + return + roundAndPackFloatx80( + floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +equal to the corresponding value `b', and 0 otherwise. The comparison is +performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than or equal to the corresponding value `b', and 0 otherwise. The +comparison is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is +less than the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is equal +to the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_eq_signaling( floatx80 a, floatx80 b ) +{ + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs +do not cause an exception. Otherwise, the comparison is performed according +to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_le_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is less +than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause +an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag floatx80_lt_quiet( floatx80 a, floatx80 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (bits64) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the 32-bit two's complement integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float128_to_int32( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x4028 - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); + return roundAndPackInt32( aSign, aSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the 32-bit two's complement integer format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic, except that the conversion is always rounded toward zero. If +`a' is a NaN, the largest positive integer is returned. Otherwise, if the +conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float128_to_int32_round_to_zero( float128 a ) +{ + flag aSign; + int32 aExp, shiftCount; + bits64 aSig0, aSig1, savedASig; + int32 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x402F - aExp; + if ( shiftCount < 17 ) { + if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; + goto invalid; + } + else if ( 48 < shiftCount ) { + if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig0 |= LIT64( 0x0001000000000000 ); + savedASig = aSig0; + aSig0 >>= shiftCount; + z = aSig0; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_exception_flags |= float_flag_invalid; + return aSign ? 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig0<<shiftCount ) != savedASig ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the single-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float128_to_float32( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + bits32 zSig; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float128ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + aSig0 |= ( aSig1 != 0 ); + shift64RightJamming( aSig0, 18, &aSig0 ); + zSig = aSig0; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x3F81; + } + return roundAndPackFloat32( aSign, aExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the double-precision floating-point format. The conversion +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float128_to_float64( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat64( float128ToCommonNaN( a ) ); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + aSig0 |= ( aSig1 != 0 ); + if ( aExp || aSig0 ) { + aSig0 |= LIT64( 0x4000000000000000 ); + aExp -= 0x3C01; + } + return roundAndPackFloat64( aSign, aExp, aSig0 ); + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point +value `a' to the extended double-precision floating-point format. The +conversion is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +floatx80 float128_to_floatx80( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloatx80( float128ToCommonNaN( a ) ); + } + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); + return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); + +} + +#endif + +/* +------------------------------------------------------------------------------- +Rounds the quadruple-precision floating-point value `a' to an integer, and +returns the result as a quadruple-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 a ) +{ + flag aSign; + int32 aExp; + bits64 lastBitMask, roundBitsMask; + int8 roundingMode; + float128 z; + + aExp = extractFloat128Exp( a ); + if ( 0x402F <= aExp ) { + if ( 0x406F <= aExp ) { + if ( ( aExp == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) + ) { + return propagateFloat128NaN( a, a ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits64) z.low < 0 ) { + ++z.high; + if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp <= 0x3FFE ) { + if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat128Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) + && ( extractFloat128Frac0( a ) + | extractFloat128Frac1( a ) ) + ) { + return packFloat128( aSign, 0x3FFF, 0, 0 ); + } + break; + case float_round_down: + return + aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) + : packFloat128( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat128( 1, 0, 0, 0 ) + : packFloat128( 0, 0x3FFF, 0, 0 ); + } + return packFloat128( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x402F - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat128Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the quadruple-precision +floating-point values `a' and `b'. If `zSign' is true, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. The +addition is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int32 expDiff; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b ); + } + return a; + } + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); + zSig2 = 0; + zSig0 |= LIT64( 0x0002000000000000 ); + zExp = aExp; + goto shiftRight1; + } + aSig0 |= LIT64( 0x0001000000000000 ); + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; + ++zExp; + shiftRight1: + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the quadruple- +precision floating-point values `a' and `b'. If `zSign' is true, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) +{ + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int32 expDiff; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN( a, b ); + } + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= LIT64( 0x4000000000000000 ); + bBigger: + sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= LIT64( 0x4000000000000000 ); + aBigger: + sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the quadruple-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_add( float128 a, float128 b ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return addFloat128Sigs( a, b, aSign ); + } + else { + return subFloat128Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the quadruple-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_sub( float128 a, float128 b ) +{ + flag aSign, bSign; + + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return subFloat128Sigs( a, b, aSign ); + } + else { + return addFloat128Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the quadruple-precision floating-point +values `a' and `b'. The operation is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_mul( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x4000; + aSig0 |= LIT64( 0x0001000000000000 ); + shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); + mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the quadruple-precision floating-point value +`a' by the corresponding value `b'. The operation is performed according to +the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_div( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, zExp; + bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + goto invalid; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return packFloat128( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero ); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FFD; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { + shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); + mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 4 ) { + mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the remainder of the quadruple-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_rem( float128 a, float128 b ) +{ + flag aSign, bSign, zSign; + int32 aExp, bExp, expDiff; + bits64 aSig0, aSig1, bSig0, bSig1; + bits64 q, term0, term1, term2, allZero, alternateASig0, alternateASig1; + bits64 sigMean1; + sbits64 sigMean0; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), + aSig1, + 15 - ( expDiff < 0 ), + &aSig0, + &aSig1 + ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + q = le128( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); + shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); + sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 61; + } + if ( -64 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + expDiff += 52; + if ( expDiff < 0 ) { + shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits64) aSig0 ); + add128( + aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits64) aSig0 < 0 ); + if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the square root of the quadruple-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +float128 float128_sqrt( float128 a ) +{ + flag aSign; + int32 aExp, zExp; + bits64 aSig0, aSig1, zSig0, zSig1, zSig2; + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + bits64 shiftedRem0, shiftedRem1; + float128 z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + z.low = float128_default_nan_low; + z.high = float128_default_nan_high; + return z; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; + aSig0 |= LIT64( 0x0001000000000000 ); + zSig0 = estimateSqrt32( aExp, aSig0>>17 ); + zSig0 <<= 31; + shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4; + if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); + shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 ); + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits64) rem0 < 0 ) { + --zSig0; + shortShift128Left( 0, zSig0, 1, &term0, &term1 ); + term1 |= 1; + add128( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 ); + zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( zSig0, zSig1, &term1, &term2 ); + shortShift128Left( term1, term2, 1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits64) rem1 < 0 ) { + --zSig1; + shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); + term3 |= 1; + add192( + rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_eq( float128 a, float128 b ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_le( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_lt( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_eq_signaling( float128 a, float128 b ) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_le_quiet( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float128_lt_quiet( float128 a, float128 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if ( float128_is_signaling_nan( a ) + || float128_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +#endif + diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h new file mode 100644 index 000000000..26745a4cb --- /dev/null +++ b/arch/arm/nwfpe/softfloat.h @@ -0,0 +1,290 @@ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/softfloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these three paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#ifndef __SOFTFLOAT_H__ +#define __SOFTFLOAT_H__ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. The same applies to +the `FLOAT128' macro and the quadruple-precision format `float128'. +------------------------------------------------------------------------------- +*/ +#define FLOATX80 +/* #define FLOAT128 */ + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef unsigned long int float32; +typedef unsigned long long float64; +#ifdef FLOATX80 +typedef struct { + unsigned short high; + unsigned long long low; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { + unsigned long long high, low; +} float128; +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +extern signed char float_detect_tininess; +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern signed char float_rounding_mode; +enum { + float_round_nearest_even = 0, + float_round_to_zero = 1, + float_round_down = 2, + float_round_up = 3 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +extern signed char float_exception_flags; +enum { + float_flag_inexact = 1, + float_flag_underflow = 2, + float_flag_overflow = 4, + float_flag_divbyzero = 8, + float_flag_invalid = 16 +}; + +ScottB: November 4, 1998 +Changed the enumeration to match the bit order in the FPA11. +*/ + +extern signed char float_exception_flags; +enum { + float_flag_invalid = 1, + float_flag_divbyzero = 2, + float_flag_overflow = 4, + float_flag_underflow = 8, + float_flag_inexact = 16 +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( signed char ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( signed int ); +float64 int32_to_float64( signed int ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( signed int ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( signed int ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float32_to_int32( float32 ); +signed int float32_to_int32_round_to_zero( float32 ); +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +char float32_eq( float32, float32 ); +char float32_le( float32, float32 ); +char float32_lt( float32, float32 ); +char float32_eq_signaling( float32, float32 ); +char float32_le_quiet( float32, float32 ); +char float32_lt_quiet( float32, float32 ); +char float32_is_signaling_nan( float32 ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float64_to_int32( float64 ); +signed int float64_to_int32_round_to_zero( float64 ); +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +char float64_eq( float64, float64 ); +char float64_le( float64, float64 ); +char float64_lt( float64, float64 ); +char float64_eq_signaling( float64, float64 ); +char float64_le_quiet( float64, float64 ); +char float64_lt_quiet( float64, float64 ); +char float64_is_signaling_nan( float64 ); + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int floatx80_to_int32( floatx80 ); +signed int floatx80_to_int32_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern signed char floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +char floatx80_eq( floatx80, floatx80 ); +char floatx80_le( floatx80, floatx80 ); +char floatx80_lt( floatx80, floatx80 ); +char floatx80_eq_signaling( floatx80, floatx80 ); +char floatx80_le_quiet( floatx80, floatx80 ); +char floatx80_lt_quiet( floatx80, floatx80 ); +char floatx80_is_signaling_nan( floatx80 ); + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision conversion routines. +------------------------------------------------------------------------------- +*/ +signed int float128_to_int32( float128 ); +signed int float128_to_int32_round_to_zero( float128 ); +float32 float128_to_float32( float128 ); +float64 float128_to_float64( float128 ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision operations. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 ); +float128 float128_add( float128, float128 ); +float128 float128_sub( float128, float128 ); +float128 float128_mul( float128, float128 ); +float128 float128_div( float128, float128 ); +float128 float128_rem( float128, float128 ); +float128 float128_sqrt( float128 ); +char float128_eq( float128, float128 ); +char float128_le( float128, float128 ); +char float128_lt( float128, float128 ); +char float128_eq_signaling( float128, float128 ); +char float128_le_quiet( float128, float128 ); +char float128_lt_quiet( float128, float128 ); +char float128_is_signaling_nan( float128 ); + +#endif + +#endif diff --git a/arch/arm/vmlinux-armv.lds b/arch/arm/vmlinux-armv.lds index f0d4a86c5..681143172 100644 --- a/arch/arm/vmlinux-armv.lds +++ b/arch/arm/vmlinux-armv.lds @@ -7,50 +7,64 @@ OUTPUT_ARCH(arm) ENTRY(_start) SECTIONS { - _text = .; /* Text and read-only data */ - .text : { + _text = .; /* Text and read-only data */ + .text : { } /* Set text start address */ + + __init_begin = .; /* Init code and data */ + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(4096); + __init_end = .; + + __ebsa285_begin = .; + .text.ebsa285 : { *(.text.ebsa285) } + .data.ebsa285 : { *(.data.ebsa285) } + . = ALIGN(4096); + __ebsa285_end = .; + + __netwinder_begin = .; + .text.netwinder : { *(.text.netwinder) } + .data.netwinder : { *(.data.netwinder) } + . = ALIGN(4096); + __netwinder_end = .; + + .text.real : { /* Real text segment */ *(.text) *(.fixup) *(.gnu.warning) - } = 0x9090 + } + .text.lock : { *(.text.lock) } /* out-of-line lock text */ .rodata : { *(.rodata) } .kstrtab : { *(.kstrtab) } - . = ALIGN(16); /* Exception table */ + . = ALIGN(16); /* Exception table */ __start___ex_table = .; __ex_table : { *(__ex_table) } __stop___ex_table = .; - __start___ksymtab = .; /* Kernel symbol table */ + __start___ksymtab = .; /* Kernel symbol table */ __ksymtab : { *(__ksymtab) } __stop___ksymtab = .; - _etext = .; /* End of text section */ + _etext = .; /* End of text section */ . = ALIGN(8192); - .data : { /* Data */ + .data : { /* Data */ *(.init.task) *(.data) CONSTRUCTORS } - _edata = .; /* End of data section */ - - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .text.init : { *(.text.init) } - .data.init : { *(.data.init) } - . = ALIGN(4096); - __init_end = .; + _edata = .; /* End of data section */ - __bss_start = .; /* BSS */ + __bss_start = .; /* BSS */ .bss : { *(.bss) } _end = . ; - /* Stabs debugging sections. */ + /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } .stab.excl 0 : { *(.stab.excl) } |