Merge with Linux 2.3.7.

WARNING: 2.3.7 is known to eat filesystems for breakfast and little children for lunch, so if you try this on your machine make backups first ...
author: Ralf Baechle <ralf@linux-mips.org> 1999-06-22 23:05:57 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1999-06-22 23:05:57 +0000
commit: 51d3b7814cdccef9188240fe0cbd8d97ff2c7470 (patch)
tree: 5cbb01d0323d4f63ade66bdf48ba4a91aaa6df16 /arch/arm
parent: 52273a23c9a84336b93a35e4847fc88fac7eb0e4 (diff)
84 files changed, 15109 insertions, 2493 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 967ee6766..1c198989d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -10,21 +10,31 @@
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
 #
-# Copyright (C) 1995, 1996 by Russell King
+# Copyright (C) 1995-1999 by Russell King
 
 CFLAGS_PROC		:=
 ASFLAGS_PROC		:=
 
-# All processors get `-mshort-load-bytes' for now, to work around alignment
-# problems.  This is more of a hack that just happens to work than a real fix
-# but it will do for now.
+# GCC 2.7 uses different options to later compilers; sort out which we have
+CONFIG_GCC_NEW		:= $(shell if $(CC) --version 2>&1 | grep '^2\.7' > /dev/null; then echo n; else echo y; fi)
+
+# Hack to get around RiscPC with StrongARM optimistaion
+# problem - force ARM710 optimisation for now.
+ifeq ($(CONFIG_GCC_NEW),y)
+  ifeq ($(CONFIG_ARCH_RPC),y)
+    ifeq ($(CONFIG_CPU_SA110),y)
+      CONFIG_CPU_SA110	:= n
+      CONFIG_CPU_ARM7	:= y
+    endif
+  endif
+endif
 
 ifeq ($(CONFIG_CPU_26),y)
   PROCESSOR		 = armo
   TEXTADDR		 = 0x02080000
   ZTEXTADDR		 = 0x01800000
   ZRELADDR		 = 0x02080000
-  ifeq ($(CONFIG_BINUTILS_NEW),y)
+  ifeq ($(CONFIG_GCC_NEW),y)
     CFLAGS_PROC		+= -mapcs-26 -mshort-load-bytes
     ifeq ($(CONFIG_CPU_ARM2),y)
       CFLAGS_PROC	+= -mcpu=arm2
@@ -49,7 +59,7 @@ endif
 ifeq ($(CONFIG_CPU_32),y)
   PROCESSOR		 = armv
   TEXTADDR		 = 0xC0008000
-  ifeq ($(CONFIG_BINUTILS_NEW),y)
+  ifeq ($(CONFIG_GCC_NEW),y)
     CFLAGS_PROC		+= -mapcs-32 -mshort-load-bytes
     ifeq ($(CONFIG_CPU_ARM6),y)
       CFLAGS_PROC	+= -mcpu=arm6
@@ -68,10 +78,11 @@ endif
 
 # Processor Architecture
 # CFLAGS_PROC - processor dependent CFLAGS
-# PROCESSOR - processor type
-# TEXTADDR - Uncompressed kernel link text address
-# ZTEXTADDR - Compressed kernel link text address
-# ZRELADDR - Compressed kernel relocating address (point at which uncompressed kernel is loaded).
+# PROCESSOR   - processor type
+# TEXTADDR    - Uncompressed kernel link text address
+# ZTEXTADDR   - Compressed kernel link text address
+# ZRELADDR    - Compressed kernel relocating address
+#	        (point at which uncompressed kernel is loaded).
 #
 
 COMPRESSED_HEAD	 = head.o
@@ -79,19 +90,16 @@ COMPRESSED_HEAD	 = head.o
 ifeq ($(CONFIG_ARCH_A5K),y)
 MACHINE		 = a5k
 ARCHDIR		 = arc
-COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o
 endif
 
 ifeq ($(CONFIG_ARCH_ARC),y)
 MACHINE		 = arc
 ARCHDIR		 = arc
-COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o
 endif
 
 ifeq ($(CONFIG_ARCH_RPC),y)
 MACHINE		 = rpc
 ARCHDIR		 = rpc
-COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr.o
 ZTEXTADDR	 = 0x10008000
 ZRELADDR	 = 0x10008000
 endif
@@ -103,13 +111,17 @@ ZTEXTADDR	 = 0x00008000
 ZRELADDR	 = 0x00008000
 endif
 
-ifeq ($(CONFIG_ARCH_EBSA285),y)
-MACHINE		 = ebsa285
+ifeq ($(CONFIG_FOOTBRIDGE),y)
+MACHINE		 = footbridge
 ARCHDIR		 = ebsa285
 ZTEXTADDR	 = 0x00008000
 ZRELADDR	 = 0x00008000
 endif
 
+ifeq ($(CONFIG_ARCH_CO285),y)
+TEXTADDR	 = 0x60008000
+endif
+
 ifeq ($(CONFIG_ARCH_NEXUSPCI),y)
 MACHINE		 = nexuspci
 ARCHDIR		 = nexuspci
@@ -119,31 +131,13 @@ COMPRESSED_EXTRA = $(TOPDIR)/arch/arm/lib/ll_char_wr_scc.o
 COMPRESSED_HEAD	 = head-nexuspci.o
 endif
 
-ifeq ($(CONFIG_ARCH_VNC),y)
-TEXTADDR	 = 0xC000C000
-MACHINE		 = vnc
-ARCHDIR		 = vnc
-endif
-
-ifeq ($(CONFIG_ARCH_TBOX),y)
-MACHINE		 = tbox
-ARCHDIR		 = tbox
-ZTEXTADDR	 = 0x80008000
-ZRELDIR		 = 0x80008000
-endif
-
 PERL		 = perl
-ifeq ($(CONFIG_BINUTILS_NEW),y)
-LD		 = $(CROSS_COMPILE)ld -m elf32arm
-else
-LD		 = $(CROSS_COMPILE)ld -m elf_arm
-endif
+LD		 = $(CROSS_COMPILE)ld
 OBJCOPY		 = $(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S
 OBJDUMP		 = $(CROSS_COMPILE)objdump
 CPP		 = $(CC) -E
 ARCHCC		:= $(word 1,$(CC))
 GCCLIB		:= `$(CC) $(CFLAGS_PROC) --print-libgcc-file-name`
-#GCCARCH		:= -B/usr/bin/arm-linuxelf- 
 HOSTCFLAGS	:= $(CFLAGS:-fomit-frame-pointer=)
 ifeq ($(CONFIG_FRAME_POINTER),y)
 CFLAGS		:= $(CFLAGS:-fomit-frame-pointer=)
@@ -153,75 +147,40 @@ ASFLAGS		:= $(ASFLAGS_PROC) $(ASFLAGS)
 LINKFLAGS	 = -T $(TOPDIR)/arch/arm/vmlinux-$(PROCESSOR).lds -e stext -Ttext $(TEXTADDR)
 ZLINKFLAGS	 = -Ttext $(ZTEXTADDR)
 
-SUBDIRS		:= $(SUBDIRS:drivers=arch/arm/drivers) arch/arm/lib arch/arm/kernel arch/arm/mm
-HEAD		:= arch/arm/kernel/head-$(PROCESSOR).o arch/arm/kernel/init_task.o
+# If we're intending to debug the kernel, make sure it has line number
+# information.  This gets stripped out when building (z)Image so it doesn't
+# add anything to the footprint of the running kernel.
+ifeq ($(CONFIG_DEBUG_INFO),y)
+CFLAGS		+= -g
+endif
+
+HEAD		:= arch/arm/kernel/head-$(PROCESSOR).o \
+		   arch/arm/kernel/init_task.o
+SUBDIRS		:= arch/arm/special $(SUBDIRS) arch/arm/lib arch/arm/kernel \
+		   arch/arm/mm arch/arm/nwfpe
 CORE_FILES	:= arch/arm/kernel/kernel.o arch/arm/mm/mm.o $(CORE_FILES)
 LIBS		:= arch/arm/lib/lib.a $(LIBS) $(GCCLIB)
-
-BLOCK_DRIVERS	:= drivers/block/block.a
-CDROM_DRIVERS	:= drivers/cdrom/cdrom.a
-ifeq ($(CONFIG_FB),y)
-CHAR_DRIVERS	:= arch/arm/drivers/char1/char1.a drivers/char/char.a arch/arm/drivers/char1/char1.a
-else
-ifeq ($(CONFIG_VGA_CONSOLE),y)
-CHAR_DRIVERS	:= arch/arm/drivers/char1/char1.a drivers/char/char.a arch/arm/drivers/char1/char1.a
-else
-CHAR_DRIVERS	:= arch/arm/drivers/char/char.a
-endif
-endif
-MISC_DRIVERS	:= drivers/misc/misc.a
-NET_DRIVERS	:= drivers/net/net.a
-PARIDE_DRIVERS	:= drivers/block/paride/paride.a
-PCI_DRIVERS	:= drivers/pci/pci.a
-SCSI_DRIVERS	:= drivers/scsi/scsi.a
-SOUND_DRIVERS	:= drivers/sound/sound.a
-VIDEO_DRIVERS	:= drivers/video/video.a
-PNP_DRIVERS	:= drivers/pnp/pnp.a
+DRIVERS		+= arch/arm/special/special.a
 
 ifeq ($(CONFIG_ARCH_ACORN),y)
-BLOCK_DRIVERS	+= drivers/acorn/block/acorn-block.a
-CHAR_DRIVERS	+= drivers/acorn/char/acorn-char.a
-NET_DRIVERS	+= drivers/acorn/net/acorn-net.a drivers/net/net.a
-SCSI_DRIVERS	+= drivers/acorn/scsi/acorn-scsi.a
+SUBDIRS		+= drivers/acorn/block drivers/acorn/char drivers/acorn/net \
+		   drivers/acorn/scsi
+DRIVERS		+= drivers/acorn/block/acorn-block.a \
+		   drivers/acorn/char/acorn-char.a \
+		   drivers/acorn/net/acorn-net.a \
+		   drivers/acorn/scsi/acorn-scsi.a
 endif
 
-DRIVERS		:= $(BLOCK_DRIVERS) $(CHAR_DRIVERS) $(MISC_DRIVERS) $(NET_DRIVERS)
-
-ifeq ($(CONFIG_FB),y)
-DRIVERS		:= $(DRIVERS) $(VIDEO_DRIVERS)
-else
-ifeq ($(CONFIG_VGA_CONSOLE),y)
-DRIVERS		:= $(DRIVERS) $(VIDEO_DRIVERS)
-endif
-endif
-ifeq ($(CONFIG_SCSI),y)
-DRIVERS		:= $(DRIVERS) $(SCSI_DRIVERS)
-endif
-ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR),)
-DRIVERS		:= $(DRIVERS) $(CDROM_DRIVERS)
-endif
-ifdef CONFIG_PCI
-DRIVERS		:= $(DRIVERS) $(PCI_DRIVERS)
-endif
-ifeq ($(CONFIG_SOUND),y)
-DRIVERS		:= $(DRIVERS) $(SOUND_DRIVERS)
-endif
-ifeq ($(CONFIG_PARIDE),y)
-DRIVERS		:= $(DRIVERS) $(PARIDE_DRIVERS)
-endif
-ifdef CONFIG_PNP
-DRIVERS		:= $(DRIVERS) $(PNP_DRIVERS)
+ifeq ($(CONFIG_NWFPE),y)
+DRIVERS		+= arch/arm/nwfpe/math-emu.a
 endif
 
+MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
+
 symlinks::
 	$(RM) include/asm-arm/arch include/asm-arm/proc
 	(cd include/asm-arm; ln -sf arch-$(ARCHDIR) arch; ln -sf proc-$(PROCESSOR) proc)
 
-# Once we've finished integrating the sources, the @$(MAKE) will disappear
-archmrproper:
-	rm -f include/asm-arm/arch include/asm-arm/proc
-	@$(MAKE) -C arch/$(ARCH)/drivers mrproper
-
 arch/arm/kernel: dummy
 	$(MAKE) linuxsubdirs SUBDIRS=arch/arm/kernel
 
@@ -231,19 +190,20 @@ arch/arm/mm: dummy
 arch/arm/lib: dummy
 	$(MAKE) linuxsubdirs SUBDIRS=arch/arm/lib
 
-MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
-
-zImage: vmlinux
-	@$(MAKEBOOT) zImage
+zImage zinstall Image install: vmlinux
+	@$(MAKEBOOT) $@
 
-zinstall: vmlinux
-	@$(MAKEBOOT) zinstall
+# Once we've finished integrating the sources, the @$(MAKE) will disappear
+archmrproper:
+	rm -f include/asm-arm/arch include/asm-arm/proc
+	@$(MAKE) -C arch/$(ARCH)/special mrproper
 
-Image: vmlinux
-	@$(MAKEBOOT) Image
+archclean:
+	@$(MAKEBOOT) clean
+	$(RM) arch/arm/lib/constants.h
 
-install: vmlinux
-	@$(MAKEBOOT) install
+archdep:
+	@$(MAKEBOOT) dep
 
 # My testing targets (that short circuit a few dependencies)
 zImg:;	@$(MAKEBOOT) zImage
@@ -251,10 +211,19 @@ Img:;	@$(MAKEBOOT) Image
 i:;	@$(MAKEBOOT) install
 zi:;	@$(MAKEBOOT) zinstall
 
-archclean:
-	@$(MAKEBOOT) clean
-	$(RM) arch/arm/lib/constants.h
+a5k_config:
+	$(RM) arch/arm/defconfig
+	cp arch/arm/def-configs/a5k arch/arm/defconfig
+
+ebsa110_config:
+	$(RM) arch/arm/defconfig
+	cp arch/arm/def-configs/ebsa110 arch/arm/defconfig
+
+footbridge_config:
+	$(RM) arch/arm/defconfig
+	cp arch/arm/def-configs/footbridge arch/arm/defconfig
+
+rpc_config:
+	$(RM) arch/arm/defconfig
+	cp arch/arm/def-configs/rpc arch/arm/defconfig
 
-archdep:
-	@$(MAKEBOOT) dep
-sed -e /^MACHINE..*=/s,= .*,= rpc,;/^PROCESSOR..*=/s,= .*,= armv, linux/arch/arm/Makefile.normal
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 0c6a04c5b..cf1481ab0 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -11,10 +11,15 @@ HEAD	=$(COMPRESSED_HEAD)
 OBJS	=$(HEAD) misc.o $(COMPRESSED_EXTRA)
 CFLAGS	=-O2 -DSTDC_HEADERS $(CFLAGS_PROC)
 ARFLAGS =rc
+FONTC	=$(TOPDIR)/drivers/video/font_acorn_8x8.c
+
+ifeq ($(CONFIG_ARCH_ACORN),y)
+OBJS	+= ll_char_wr.o font.o
+endif
 
 all:		vmlinux
 
-vmlinux:	piggy.o $(OBJS)
+vmlinux:	$(OBJS) piggy.o
 		$(LD) $(ZLINKFLAGS) -o vmlinux $(OBJS) piggy.o
 
 $(HEAD): 	$(HEAD:.o=.S)
@@ -29,5 +34,8 @@ piggy.o:	$(SYSTEM)
 		$(LD) -r -o piggy.o -b binary $$tmppiggy.gz -b elf32-arm -T $$tmppiggy.lnk; \
 		rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk;
 
+font.o:		$(FONTC)
+		$(CC) -Dstatic= -c -o $@ $(FONTC)
+
 clean:;		rm -f vmlinux core
 
diff --git a/arch/arm/lib/ll_char_wr.S b/arch/arm/boot/compressed/ll_char_wr.S
index 966d2846e..57865f2fd 100644
--- a/arch/arm/lib/ll_char_wr.S
+++ b/arch/arm/boot/compressed/ll_char_wr.S
@@ -12,7 +12,7 @@
 @ Regs: [] = corruptible
 @       {} = used
 @       () = do not use
-
+#define __ASSEMBLY__
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 		.text
@@ -25,7 +25,7 @@
 
 LC0:		.word	SYMBOL_NAME(bytes_per_char_h)
 		.word	SYMBOL_NAME(video_size_row)
-		.word	SYMBOL_NAME(cmap_80)
+		.word	SYMBOL_NAME(acorndata_8x8)
 		.word	SYMBOL_NAME(con_charconvtable)
 
 ENTRY(ll_write_char)
diff --git a/arch/arm/config.in b/arch/arm/config.in
index 467218db7..2fea6a661 100644
--- a/arch/arm/config.in
+++ b/arch/arm/config.in
@@ -14,18 +14,31 @@ choice 'ARM system type'	\
 	 A5000			CONFIG_ARCH_A5K \
 	 RiscPC			CONFIG_ARCH_RPC \
 	 EBSA-110		CONFIG_ARCH_EBSA110 \
-	 EBSA-285		CONFIG_ARCH_EBSA285 \
-	 NexusPCI		CONFIG_ARCH_NEXUSPCI \
-	 Corel-VNC		CONFIG_ARCH_VNC \
-	 Tbox			CONFIG_ARCH_TBOX" RiscPC
+	 FootBridge-based	CONFIG_FOOTBRIDGE" RiscPC
+
+if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then
+  bool 'FootBridge in HOST mode'	CONFIG_HOST_FOOTBRIDGE
+  if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then
+    define_bool CONFIG_ADDIN_FOOTBRIDGE n
+  else
+    define_bool CONFIG_ADDIN_FOOTBRIDGE y
+  fi
+fi
+
+if [ "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then
+  bool '  Include support for Intel EBSA285' CONFIG_ARCH_EBSA285
+  bool '  Include support for Chalice CATS boards' CONFIG_CATS
+  bool '  Include support for Corel NetWinder' CONFIG_ARCH_NETWINDER
+fi
 
-if [ "$CONFIG_ARCH_EBSA285" = "y" ]; then
-  bool '  Include support for CATS boards' CONFIG_CATS
+if [ "$CONFIG_ADDIN_FOOTBRIDGE" = "y" ]; then
+  # If we get any other footbridge-based plug-in boards, then
+  # add your architecture options here
+  define_bool CONFIG_ARCH_CO285 y
 fi
 
 # Select various configuration options depending on the machine type
 #  Easy check for Acorn-style architectures
-
 if [ "$CONFIG_ARCH_ARC" = "y" -o \
      "$CONFIG_ARCH_A5K" = "y" -o \
      "$CONFIG_ARCH_RPC" = "y" ]; then
@@ -34,23 +47,19 @@ else
   define_bool CONFIG_ARCH_ACORN n
 fi
 
-if [ "$CONFIG_ARCH_TBOX" = "y" ]; then
-  define_bool CONFIG_BUS_I2C y
-fi
+#if [ "$CONFIG_ARCH_TBOX" = "y" ]; then
+#  define_bool CONFIG_BUS_I2C y
+#fi
 
 #  These machines always have PCI
-
 if [ "$CONFIG_ARCH_NEXUSPCI" = "y" -o \
-     "$CONFIG_ARCH_VNC" = "y" ]; then
+     "$CONFIG_HOST_FOOTBRIDGE" = "y" ]; then
   define_bool CONFIG_PCI y
 fi
-if [ "$CONFIG_ARCH_EBSA285" = "y" ]; then
-  bool "PCI support" CONFIG_PCI
-fi
 
 # These machines have ISA-DMA
 if [ "$CONFIG_CATS" = "y" -o \
-     "$CONFIG_ARCH_VNC" = "y" ]; then
+     "$CONFIG_ARCH_NETWINDER" = "y" ]; then
   define_bool CONFIG_ISA_DMA y
 else
   define_bool CONFIG_ISA_DMA n
@@ -59,7 +68,6 @@ fi
 # Figure out whether this system uses 26-bit or 32-bit CPUs.  Nobody has
 # ever built a machine that can take both, and now that ARM3 is obsolete
 # nobody is likely to either.
-
 if [ "$CONFIG_ARCH_ARC" = "y" -o \
      "$CONFIG_ARCH_A5K" = "y" ]; then
   define_bool CONFIG_CPU_32 n
@@ -71,7 +79,6 @@ fi
 
 # Now allow the user to choose a more precise CPU.  This is only used to set
 # the flags we pass to GCC, not in any code.
-
 choice 'Optimise for CPU'				\
 	"ARM2		CONFIG_CPU_ARM2 \
 	 ARM3		CONFIG_CPU_ARM3 \
@@ -80,22 +87,21 @@ choice 'Optimise for CPU'				\
 	 SA110		CONFIG_CPU_SA110" ARM6
 
 if [ "$CONFIG_CPU_26" = "y" ]; then
-
 # For 26-bit CPUs, the page size changes with the amount of physical RAM!
 # The default is 4MB but if the user has less they have to own up to it here.
-
   choice 'Physical memory size'		\
 	"4MB+		CONFIG_PAGESIZE_32	\
-	 2MB		CONFIG_PAGESIZE_16	\
-	 1MB/512K	CONFIG_PAGESIZE_8" 4MB+
+	 2MB		CONFIG_PAGESIZE_16" 4MB+
 fi
 endmenu
 
 mainmenu_option next_comment
 comment 'Code maturity level options'
 bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL
-bool 'Use new compilation options (for GCC 2.8)' CONFIG_BINUTILS_NEW
-bool 'Compile kernel with frame pointer (for useful debugging)' CONFIG_FRAME_POINTER
+if [ "$CONFIG_CPU_32" = "y" -a "$CONFIG_ARCH_EBSA110" != "y" ]; then
+  bool 'Enable kernel-mode alignment trap handler (EXPERIMENTAL)' CONFIG_ALIGNMENT_TRAP
+fi
+bool 'Split text into discardable sections' CONFIG_TEXT_SECTIONS
 endmenu
 
 mainmenu_option next_comment
@@ -113,13 +119,19 @@ bool 'Networking support' CONFIG_NET
 bool 'System V IPC' CONFIG_SYSVIPC
 bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
 bool 'Sysctl support' CONFIG_SYSCTL
+tristate 'Math emulation' CONFIG_NWFPE
 tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
 tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
 tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
+if [ "$CONFIG_CPU_32" = "y" ]; then
+  tristate 'RISC OS personality' CONFIG_ARTHUR
+fi
 
 tristate 'Parallel port support' CONFIG_PARPORT
 if [ "$CONFIG_PARPORT" != "n" ]; then
-  dep_tristate '  Archimedes hardware' CONFIG_PARPORT_ARC $CONFIG_PARPORT
+  if [ "$CONFIG_ARCH_ARC" = "y" ]; then
+    dep_tristate '  Archimedes hardware' CONFIG_PARPORT_ARC $CONFIG_PARPORT
+  fi
   dep_tristate '  PC-style hardware' CONFIG_PARPORT_PC $CONFIG_PARPORT
 # If exactly one hardware type is selected then parport will optimise away
 # support for loading any others.  Defeat this if the user is keen.
@@ -129,13 +141,29 @@ if [ "$CONFIG_PARPORT" != "n" ]; then
     fi
   fi
 fi
-if [ "$CONFIG_ARCH_EBSA285" = "y" -o \
-     "$CONFIG_ARCH_EBSA110" = "y" -o \
-     "$CONFIG_ARCH_VNC" = "y" ]; then
+if [ "$CONFIG_ARCH_EBSA110" = "y" -o \
+     "$CONFIG_ARCH_NETWINDER" = "y" -o \
+     "$CONFIG_CATS" = "y" ]; then
   string 'Initial kernel command string' CONFIG_CMDLINE
 fi
+if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \
+     "$CONFIG_ARCH_EBSA110" = "y" -o   \
+     "$CONFIG_ARCH_EBSA285" = "y" -o   \
+     "$CONFIG_ARCH_CO285" = "y" ]; then
+  bool 'Timer and CPU usage LEDs' CONFIG_LEDS
+  if [ "$CONFIG_LEDS" = "y" ]; then
+    if [ "$CONFIG_ARCH_NETWINDER" = "y" -o \
+         "$CONFIG_ARCH_EBSA285" = "y" -o   \
+	 "$CONFIG_ARCH_CO285" = "y" ]; then
+      bool '  Timer LED' CONFIG_LEDS_TIMER
+      bool '  CPU usage LED' CONFIG_LEDS_CPU
+    fi
+  fi
+fi
 endmenu
 
+source drivers/i2o/Config.in
+
 source drivers/pnp/Config.in
 
 source drivers/block/Config.in
@@ -144,15 +172,19 @@ if [ "$CONFIG_ARCH_ACORN" = "y" ]; then
   source drivers/acorn/block/Config.in
 fi
 
-if [ "$CONFIG_VGA_CONSOLE" = "n" -a "$CONFIG_FB" = "n" ]; then
-  source arch/arm/drivers/char/Config.in
-else
-  source drivers/char/Config.in
-fi
+source drivers/char/Config.in
 if [ "$CONFIG_ARCH_ACORN" = "y" ]; then
-  source drivers/acorn/char/Config.in
+  if [ "$CONFIG_MOUSE" = "y" ]; then
+    if [ "$CONFIG_ARCH_RPC" != "y" ]; then
+      define_bool CONFIG_KBDMOUSE y
+    else
+      define_bool CONFIG_RPCMOUSE y
+    fi
+  fi
 fi
 
+source drivers/usb/Config.in
+
 if [ "$CONFIG_VT" = "y" ]; then
   mainmenu_option next_comment
   comment 'Console drivers'
@@ -166,9 +198,11 @@ fi
 
 if [ "$CONFIG_NET" = "y" ]; then
   source net/Config.in
-fi
 
-if [ "$CONFIG_NET" = "y" ]; then
+  source net/ax25/Config.in
+
+  source net/irda/Config.in
+
   mainmenu_option next_comment
   comment 'Network device support'
 
@@ -179,6 +213,15 @@ if [ "$CONFIG_NET" = "y" ]; then
   endmenu
 fi
 
+# mainmenu_option next_comment
+# comment 'ISDN subsystem'
+#
+# tristate 'ISDN support' CONFIG_ISDN
+# if [ "$CONFIG_ISDN" != "n" ]; then
+#   source drivers/isdn/Config.in
+# fi
+# endmenu
+
 mainmenu_option next_comment
 comment 'SCSI support'
 
@@ -200,21 +243,29 @@ if [ "$CONFIG_ARCH_ACORN" = "y" -o "$CONFIG_PCI" = "y" ]; then
   endmenu
 fi
 
-# mainmenu_option next_comment
-# comment 'ISDN subsystem'
-#
-# tristate 'ISDN support' CONFIG_ISDN
-# if [ "$CONFIG_ISDN" != "n" ]; then
-#   source drivers/isdn/Config.in
-# fi
-# endmenu
-
 source fs/Config.in
 
 mainmenu_option next_comment
 comment 'Kernel hacking'
 
-bool 'Debug kernel errors' CONFIG_DEBUG_ERRORS
+bool 'Compile kernel with frame pointer (for useful debugging)' CONFIG_FRAME_POINTER
+bool 'Verbose kernel error messages' CONFIG_DEBUG_ERRORS
+bool 'Verbose user fault messages' CONFIG_DEBUG_USER
+bool 'Include debugging information in kernel binary' CONFIG_DEBUG_INFO
 #bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC
 bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+  if [ "$CONFIG_CPU_26" = "y" ]; then
+    bool 'Disable pgtable cache (EXPERIMENTAL)' CONFIG_NO_PGT_CACHE
+  fi
+
+  # These options are only for real kernel hackers
+  # who want to get their hands dirty. 
+  bool 'Kernel low-level debugging functions' CONFIG_DEBUG_LL
+  if [ "$CONFIG_DEBUG_LL" = "y" ]; then
+    if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then
+      bool 'Kernel low-level debugging messages via DC21285 port' CONFIG_DEBUG_DC21285_PORT
+    fi
+  fi
+fi
 endmenu
diff --git a/arch/arm/defconfig b/arch/arm/defconfig
index db89599be..ce85d6ffc 100644
--- a/arch/arm/defconfig
+++ b/arch/arm/defconfig
@@ -4,47 +4,71 @@
 CONFIG_ARM=y
 
 #
+# System and processor type
+#
+# CONFIG_ARCH_ARC is not set
+# CONFIG_ARCH_A5K is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_EBSA110 is not set
+CONFIG_FOOTBRIDGE=y
+CONFIG_HOST_FOOTBRIDGE=y
+# CONFIG_ADDIN_FOOTBRIDGE is not set
+CONFIG_ARCH_EBSA285=y
+# CONFIG_CATS is not set
+CONFIG_ARCH_NETWINDER=y
+# CONFIG_ARCH_ACORN is not set
+CONFIG_PCI=y
+CONFIG_ISA_DMA=y
+CONFIG_CPU_32=y
+# CONFIG_CPU_26 is not set
+# CONFIG_CPU_ARM2 is not set
+# CONFIG_CPU_ARM3 is not set
+# CONFIG_CPU_ARM6 is not set
+# CONFIG_CPU_ARM7 is not set
+CONFIG_CPU_SA110=y
+
+#
 # Code maturity level options
 #
 CONFIG_EXPERIMENTAL=y
+# CONFIG_ALIGNMENT_TRAP is not set
+# CONFIG_TEXT_SECTIONS is not set
 
 #
 # Loadable module support
 #
 CONFIG_MODULES=y
-CONFIG_MODVERSIONS=y
+# CONFIG_MODVERSIONS is not set
 CONFIG_KMOD=y
 
 #
 # General setup
 #
-# CONFIG_ARCH_ARC is not set
-# CONFIG_ARCH_A5K is not set
-CONFIG_ARCH_RPC=y
-# CONFIG_ARCH_EBSA110 is not set
-# CONFIG_ARCH_NEXUSPCI is not set
-CONFIG_ARCH_ACORN=y
-# CONFIG_PCI is not set
-# CONFIG_CPU_ARM2 is not set
-# CONFIG_CPU_ARM3 is not set
-# CONFIG_CPU_ARM6 is not set
-CONFIG_CPU_SA110=y
-CONFIG_FRAME_POINTER=y
-# CONFIG_BINUTILS_NEW is not set
-CONFIG_DEBUG_ERRORS=y
 CONFIG_NET=y
 CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
+CONFIG_NWFPE=y
 CONFIG_BINFMT_AOUT=y
-CONFIG_BINFMT_ELF=m
-# CONFIG_BINFMT_JAVA is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_ARTHUR is not set
 CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=y
+CONFIG_CMDLINE="root=/dev/hda2 ro mem=32M parport=0x378,7 ide0=autotune"
+CONFIG_LEDS=y
+CONFIG_LEDS_TIMER=y
+# CONFIG_LEDS_CPU is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
 
 #
-# Floppy, IDE, and other block devices
+# Block devices
 #
-CONFIG_BLK_DEV_FD=y
+# CONFIG_BLK_DEV_FD is not set
 CONFIG_BLK_DEV_IDE=y
 
 #
@@ -52,32 +76,165 @@ CONFIG_BLK_DEV_IDE=y
 #
 # CONFIG_BLK_DEV_HD_IDE is not set
 CONFIG_BLK_DEV_IDEDISK=y
-CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDECD is not set
 # CONFIG_BLK_DEV_IDETAPE is not set
 # CONFIG_BLK_DEV_IDEFLOPPY is not set
 # CONFIG_BLK_DEV_IDESCSI is not set
-# CONFIG_BLK_DEV_IDE_PCMCIA is not set
-CONFIG_BLK_DEV_IDE_CARDS=y
-CONFIG_BLK_DEV_IDE_ICSIDE=y
-# CONFIG_BLK_DEV_IDE_RAPIDE is not set
-# CONFIG_BLK_DEV_XD is not set
+# CONFIG_BLK_DEV_CMD640 is not set
+# CONFIG_BLK_DEV_RZ1000 is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_IDEDMA=y
+CONFIG_BLK_DEV_OFFBOARD=y
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_VIA82C586 is not set
+# CONFIG_BLK_DEV_CMD646 is not set
+CONFIG_BLK_DEV_SL82C105=y
+# CONFIG_IDE_CHIPSETS is not set
 
 #
 # Additional Block Devices
 #
 CONFIG_BLK_DEV_LOOP=m
-# CONFIG_BLK_DEV_MD is not set
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_STRIPED=m
+CONFIG_MD_MIRRORING=m
+CONFIG_MD_RAID5=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_INITRD=y
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_BLK_DEV_XD is not set
 CONFIG_PARIDE_PARPORT=y
-# CONFIG_PARIDE is not set
-CONFIG_BLK_DEV_PART=y
+CONFIG_PARIDE=m
+
+#
+# Parallel IDE high-level drivers
+#
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARIDE_PG=m
+
+#
+# Parallel IDE protocol modules
+#
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_EPAT=m
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
 # CONFIG_BLK_DEV_HD is not set
 
 #
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_CONSOLE=y
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_UNIX98_PTYS is not set
+CONFIG_PRINTER=m
+CONFIG_PRINTER_READBACK=y
+CONFIG_MOUSE=y
+
+#
+# Mice
+#
+# CONFIG_ATIXL_BUSMOUSE is not set
+# CONFIG_BUSMOUSE is not set
+# CONFIG_MS_BUSMOUSE is not set
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+# CONFIG_QIC02_TAPE is not set
+CONFIG_WATCHDOG=y
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+# CONFIG_WDT is not set
+CONFIG_SOFT_WATCHDOG=y
+# CONFIG_PCWATCHDOG is not set
+# CONFIG_ACQUIRE_WDT is not set
+CONFIG_DS1620=y
+CONFIG_NWBUTTON=y
+CONFIG_NWBUTTON_REBOOT=y
+CONFIG_NWFLASH=m
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
+
+#
+# Video For Linux
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Joystick support
+#
+# CONFIG_JOYSTICK is not set
+# CONFIG_DTLK is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+
+#
+# Console drivers
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_FB=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FB_PM2 is not set
+CONFIG_FB_CYBER2000=y
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_VIRTUAL is not set
+CONFIG_FBCON_ADVANCED=y
+# CONFIG_FBCON_MFB is not set
+# CONFIG_FBCON_CFB2 is not set
+# CONFIG_FBCON_CFB4 is not set
+CONFIG_FBCON_CFB8=y
+CONFIG_FBCON_CFB16=y
+CONFIG_FBCON_CFB24=y
+# CONFIG_FBCON_CFB32 is not set
+# CONFIG_FBCON_AFB is not set
+# CONFIG_FBCON_ILBM is not set
+# CONFIG_FBCON_IPLAN2P2 is not set
+# CONFIG_FBCON_IPLAN2P4 is not set
+# CONFIG_FBCON_IPLAN2P8 is not set
+# CONFIG_FBCON_MAC is not set
+CONFIG_FBCON_VGA=y
+# CONFIG_FBCON_FONTWIDTH8_ONLY is not set
+CONFIG_FBCON_FONTS=y
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+# CONFIG_FONT_6x11 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+CONFIG_FONT_ACORN_8x8=y
+
+#
 # Networking options
 #
-# CONFIG_PACKET is not set
+CONFIG_PACKET=y
 # CONFIG_NETLINK is not set
 # CONFIG_FIREWALL is not set
 # CONFIG_FILTER is not set
@@ -85,21 +242,20 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_IP_ACCT is not set
-# CONFIG_IP_MASQUERADE is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
 # CONFIG_IP_ROUTER is not set
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
-# CONFIG_IP_ALIAS is not set
+CONFIG_IP_ALIAS=y
 # CONFIG_SYN_COOKIES is not set
 
 #
 # (it is safe to leave these untouched)
 #
 # CONFIG_INET_RARP is not set
-CONFIG_IP_NOSR=y
-# CONFIG_SKB_LARGE is not set
+CONFIG_SKB_LARGE=y
 # CONFIG_IPV6 is not set
 
 #
@@ -111,107 +267,198 @@ CONFIG_IP_NOSR=y
 # CONFIG_LAPB is not set
 # CONFIG_BRIDGE is not set
 # CONFIG_LLC is not set
+# CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 # CONFIG_NET_FASTROUTE is not set
 # CONFIG_NET_HW_FLOWCONTROL is not set
 # CONFIG_CPU_IS_SLOW is not set
+
+#
+# QoS and/or fair queueing
+#
 # CONFIG_NET_SCHED is not set
-# CONFIG_NET_PROFILE is not set
 
 #
-# SCSI support
+# Amateur Radio support
 #
-CONFIG_SCSI=y
+# CONFIG_HAMRADIO is not set
 
 #
-# SCSI support type (disk, tape, CD-ROM)
+# IrDA subsystem support
 #
-CONFIG_BLK_DEV_SD=y
-# CONFIG_CHR_DEV_ST is not set
-CONFIG_BLK_DEV_SR=y
-# CONFIG_BLK_DEV_SR_VENDOR is not set
-# CONFIG_CHR_DEV_SG is not set
+# CONFIG_IRDA is not set
 
 #
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+# Network device support
 #
-# CONFIG_SCSI_MULTI_LUN is not set
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
+CONFIG_NETDEVICES=y
+# CONFIG_ARCNET is not set
+# CONFIG_DUMMY is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_NET_ETHERNET=y
+# CONFIG_ARM_AM79C961A is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+CONFIG_VORTEX=y
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_RTL8139 is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_ACENIC is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_EISA=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_DE4X5 is not set
+CONFIG_DEC_ELCP=m
+# CONFIG_DGRS is not set
+# CONFIG_EEXPRESS_PRO100 is not set
+# CONFIG_LNE390 is not set
+# CONFIG_NE3210 is not set
+CONFIG_NE2K_PCI=y
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+# CONFIG_ES3210 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_ZNET is not set
+# CONFIG_NET_POCKET is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_DLCI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
 
 #
-# SCSI low-level drivers
+# CCP compressors for PPP are only built as modules.
 #
-CONFIG_SCSI_ACORNSCSI_3=m
-CONFIG_SCSI_ACORNSCSI_TAGGED_QUEUE=y
-CONFIG_SCSI_ACORNSCSI_SYNC=y
-CONFIG_SCSI_CUMANA_2=m
-CONFIG_SCSI_POWERTECSCSI=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+# CONFIG_NET_RADIO is not set
+# CONFIG_TR is not set
+# CONFIG_SHAPER is not set
+# CONFIG_HOSTESS_SV11 is not set
+# CONFIG_COSA is not set
+# CONFIG_RCPCI is not set
 
 #
-# The following drives are not fully supported
+# SCSI support
 #
-CONFIG_SCSI_CUMANA_1=m
-CONFIG_SCSI_OAK1=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_PPA_HAVE_PEDANTIC=2
+# CONFIG_SCSI is not set
 
 #
-# Network device support
+# Sound
 #
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_EQUALIZER is not set
-CONFIG_PPP=m
+CONFIG_SOUND=m
+# CONFIG_SOUND_ES1370 is not set
+# CONFIG_SOUND_ES1371 is not set
+# CONFIG_SOUND_SONICVIBES is not set
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_OSS=m
+# CONFIG_SOUND_PAS is not set
+CONFIG_SOUND_SB=m
+CONFIG_SOUND_ADLIB=m
+# CONFIG_SOUND_GUS is not set
+# CONFIG_SOUND_MPU401 is not set
+# CONFIG_SOUND_PSS is not set
+# CONFIG_SOUND_MSS is not set
+# CONFIG_SOUND_SSCAPE is not set
+# CONFIG_SOUND_TRIX is not set
+# CONFIG_SOUND_MAD16 is not set
+# CONFIG_SOUND_WAVEFRONT is not set
+# CONFIG_SOUND_CS4232 is not set
+# CONFIG_SOUND_OPL3SA2 is not set
+# CONFIG_SOUND_MAUI is not set
+# CONFIG_SOUND_SGALAXY is not set
+# CONFIG_SOUND_AD1816 is not set
+# CONFIG_SOUND_OPL3SA1 is not set
+# CONFIG_SOUND_SOFTOSS is not set
+# CONFIG_SOUND_YM3812 is not set
+# CONFIG_SOUND_VMIDI is not set
+# CONFIG_SOUND_UART6850 is not set
+# CONFIG_SOUND_VIDC is not set
+CONFIG_SOUND_WAVEARTIST=m
+CONFIG_WAVEARTIST_BASE=250
+CONFIG_WAVEARTIST_IRQ=12
+CONFIG_WAVEARTIST_DMA=3
+CONFIG_WAVEARTIST_DMA2=7
 
 #
-# CCP compressors for PPP are only built as modules.
+# Additional low level sound drivers
 #
-# CONFIG_SLIP is not set
-CONFIG_ETHER1=m
-CONFIG_ETHER3=m
-CONFIG_ETHERH=m
+# CONFIG_LOWLEVEL_SOUND is not set
 
 #
 # Filesystems
 #
 # CONFIG_QUOTA is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_EXT2_FS=y
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_FAT_FS=y
-CONFIG_MSDOS_FS=y
+# CONFIG_AUTOFS_FS is not set
+CONFIG_ADFS_FS=y
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
 # CONFIG_UMSDOS_FS is not set
-CONFIG_VFAT_FS=y
+CONFIG_VFAT_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+# CONFIG_MINIX_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_HPFS_FS is not set
 CONFIG_PROC_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
 CONFIG_NFS_FS=y
-CONFIG_NFSD=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=m
+# CONFIG_NFSD_SUN is not set
 CONFIG_SUNRPC=y
 CONFIG_LOCKD=y
-# CONFIG_CODA_FS is not set
 # CONFIG_SMB_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_NTFS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_ADFS_FS=y
-CONFIG_ADFS_FS=y
+# CONFIG_NCP_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_OSF_PARTITION is not set
 # CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+CONFIG_ACORN_PARTITION=y
+CONFIG_ACORN_PARTITION_ADFS=y
+# CONFIG_ACORN_PARTITION_ICS is not set
+# CONFIG_ACORN_PARTITION_POWERTEC is not set
+# CONFIG_ACORN_PARTITION_RISCIX is not set
 CONFIG_NLS=y
 
 #
 # Native Language Support
 #
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
 # CONFIG_NLS_CODEPAGE_855 is not set
 # CONFIG_NLS_CODEPAGE_857 is not set
 # CONFIG_NLS_CODEPAGE_860 is not set
@@ -223,8 +470,8 @@ CONFIG_NLS=y
 # CONFIG_NLS_CODEPAGE_866 is not set
 # CONFIG_NLS_CODEPAGE_869 is not set
 # CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_1 is not set
-# CONFIG_NLS_ISO8859_2 is not set
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
 # CONFIG_NLS_ISO8859_5 is not set
@@ -232,34 +479,15 @@ CONFIG_NLS=y
 # CONFIG_NLS_ISO8859_7 is not set
 # CONFIG_NLS_ISO8859_8 is not set
 # CONFIG_NLS_ISO8859_9 is not set
+CONFIG_NLS_ISO8859_15=m
 # CONFIG_NLS_KOI8_R is not set
 
 #
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_SERIAL=y
-# CONFIG_SERIAL_CONSOLE is not set
-# CONFIG_SERIAL_EXTENDED is not set
-CONFIG_ATOMWIDE_SERIAL=y
-CONFIG_DUALSP_SERIAL=y
-CONFIG_MOUSE=y
-CONFIG_PRINTER=m
-CONFIG_PRINTER_READBACK=y
-# CONFIG_UMISC is not set
-# CONFIG_WATCHDOG is not set
-CONFIG_RPCMOUSE=y
-
-#
-# Sound
-#
-CONFIG_SOUND=m
-CONFIG_VIDC=y
-CONFIG_AUDIO=y
-DSP_BUFFSIZE=65536
-
-#
 # Kernel hacking
 #
+CONFIG_FRAME_POINTER=y
+CONFIG_DEBUG_ERRORS=y
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_INFO is not set
 CONFIG_MAGIC_SYSRQ=y
+# CONFIG_DEBUG_LL is not set
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 23b2c1267..5bcc22af1 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -9,31 +9,37 @@ HEAD_OBJ  = head-$(PROCESSOR).o
 ENTRY_OBJ = entry-$(PROCESSOR).o
 
 O_TARGET := kernel.o
-O_OBJS   := $(ENTRY_OBJ) ioport.o irq.o process.o ptrace.o setup.o \
+O_OBJS   := $(ENTRY_OBJ) irq.o process.o ptrace.o setup.o \
 	    signal.o sys_arm.o time.o traps.o
 
-DMA_OBJS_arc      = dma-arc.o
-DMA_OBJS_a5k      = dma-a5k.o
-DMA_OBJS_rpc      = dma-rpc.o
-DMA_OBJS_ebsa110  = dma-dummy.o
-DMA_OBJS_ebsa285  = dma-ebsa285.o
-DMA_OBJS_nexuspci =
-DMA_OBJS_vnc      = dma-vnc.o
-
-O_OBJS_arc        = ecard.o iic.o fiq.o oldlatches.o
-O_OBJS_a5k        = ecard.o iic.o fiq.o
-O_OBJS_rpc        = ecard.o iic.o fiq.o
-O_OBJS_ebsa110    = leds-ebsa110.o
-O_OBJS_ebsa285    = leds-ebsa285.o hw-ebsa285.o
-O_OBJS_nexuspci   =
-O_OBJS_vnc        = leds-ebsa285.o hw-vnc.o
+ifeq ($(CONFIG_ISA_DMA),y)
+  ISA_DMA_OBJS += dma-isa.o
+endif
+
+O_OBJS_arc        = dma-arc.o iic.o fiq.o oldlatches.o
+O_OBJS_a5k        = dma-a5k.o iic.o fiq.o
+O_OBJS_rpc        = dma-rpc.o iic.o fiq.o
+O_OBJS_ebsa110    = dma-dummy.o
+O_OBJS_footbridge = dma-footbridge.o $(ISA_DMA_OBJS)
+O_OBJS_nexuspci   = dma-dummy.o
+
+OX_OBJS_arc	  = dma.o
+OX_OBJS_a5k	  = dma.o
+OX_OBJS_rpc	  = dma.o
+OX_OBJS_ebsa110	  = 
+OX_OBJS_footbridge= dma.o hw-footbridge.o
+OX_OBJS_nexuspci  =
 
 all: lib kernel.o $(HEAD_OBJ) init_task.o
 
+O_OBJS += $(O_OBJS_$(MACHINE))
+
 ifeq ($(CONFIG_MODULES),y)
   OX_OBJS = armksyms.o
-else
-  O_OBJS += armksyms.o
+endif
+
+ifeq ($(CONFIG_ARCH_ACORN),y)
+  OX_OBJS += ecard.o
 endif
 
 ifeq ($(MACHINE),nexuspci)
@@ -46,17 +52,23 @@ else
   endif
 endif
 
-ifneq ($(DMA_OBJS_$(MACHINE)),)
-  OX_OBJS += dma.o
-  O_OBJS  += $(DMA_OBJS_$(MACHINE))
-  ifeq ($(CONFIG_ISA_DMA),y)
-    O_OBJS += dma-isa.o
-  endif
+ifdef CONFIG_LEDS
+  OX_OBJS += leds-$(MACHINE).o
+endif
+
+ifeq ($(CONFIG_MODULES),y)
+  OX_OBJS += $(OX_OBJS_$(MACHINE))
 else
-  O_OBJS += dma-dummy.o
+  O_OBJS += $(OX_OBJS_$(MACHINE))
 endif
 
-O_OBJS += $(O_OBJS_$(MACHINE))
+ifeq ($(CONFIG_ARTHUR),y)
+  O_OBJS += arthur.o
+else
+  ifeq ($(CONFIG_ARTHUR),m)
+    M_OBJS += arthur.o
+  endif
+endif
 
 $(HEAD_OBJ): $(HEAD_OBJ:.o=.S)
 	$(CC) -D__ASSEMBLY__ -DTEXTADDR=$(TEXTADDR) -traditional -c $(HEAD_OBJ:.o=.S) -o $@
@@ -72,3 +84,7 @@ $(ENTRY_OBJ): ../lib/constants.h
 
 lib:
 	$(MAKE) -C ../lib constants.h
+
+# Spell out some dependencies that `make dep' doesn't spot
+entry-armv.o: calls.S
+entry-armo.o: calls.S
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 149349b4a..c93421ba7 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -6,20 +6,40 @@
 #include <linux/mman.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/in6.h>
 
-#include <asm/ecard.h>
 #include <asm/elf.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/pgtable.h>
+#include <asm/semaphore.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/checksum.h>
 
 extern void dump_thread(struct pt_regs *, struct user *);
 extern int dump_fpu(struct pt_regs *, struct user_fp_struct *);
 extern void inswb(unsigned int port, void *to, int len);
 extern void outswb(unsigned int port, const void *to, int len);
 
+extern unsigned int local_bh_count[NR_CPUS];
+extern unsigned int local_irq_count[NR_CPUS];
+
+extern void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags);
+extern void iounmap(void *addr);
+
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+/*
+ * syscalls
+ */
+extern int sys_write(int, const char *, int);
+extern int sys_read(int, char *, int);
+extern int sys_lseek(int, off_t, int);
+extern int sys_open(const char *, int, int);
+extern int sys_exit(int);
+extern int sys_wait4(int, int *, int, struct rusage *);
+
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that
@@ -43,6 +63,8 @@ extern void __udivsi3(void);
 extern void __umoddi3(void);
 extern void __umodsi3(void);
 
+extern void ret_from_exception(void);
+extern void fpundefinstr(void);
 extern void fp_enter(void);
 #define EXPORT_SYMBOL_ALIAS(sym,orig) \
  const char __kstrtab_##sym##[] __attribute__((section(".kstrtab"))) = \
@@ -57,32 +79,46 @@ EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter);
 EXPORT_SYMBOL_ALIAS(fp_printk,printk);
 EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig);
 
+#ifdef CONFIG_CPU_26
+EXPORT_SYMBOL(fpundefinstr);
+EXPORT_SYMBOL(ret_from_exception);
+#endif
+
 	/* platform dependent support */
 EXPORT_SYMBOL(dump_thread);
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(udelay);
 EXPORT_SYMBOL(xchg_str);
-
-	/* expansion card support */
-#ifdef CONFIG_ARCH_ACORN
-EXPORT_SYMBOL(ecard_startfind);
-EXPORT_SYMBOL(ecard_find);
-EXPORT_SYMBOL(ecard_readchunk);
-EXPORT_SYMBOL(ecard_address);
+EXPORT_SYMBOL(local_bh_count);
+EXPORT_SYMBOL(local_irq_count);
+#ifdef CONFIG_CPU_32
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
 #endif
+EXPORT_SYMBOL(kernel_thread);
 
 EXPORT_SYMBOL(enable_irq);
 EXPORT_SYMBOL(disable_irq);
 
 	/* processor dependencies */
 EXPORT_SYMBOL(processor);
-EXPORT_SYMBOL(machine_type);
+EXPORT_SYMBOL(__machine_arch_type);
+
+	/* networking */
+EXPORT_SYMBOL(csum_partial_copy);
+EXPORT_SYMBOL(__csum_ipv6_magic);
 
 	/* io */
-EXPORT_SYMBOL(outswb);
+EXPORT_SYMBOL(outsb);
 EXPORT_SYMBOL(outsw);
-EXPORT_SYMBOL(inswb);
+EXPORT_SYMBOL(outsl);
+EXPORT_SYMBOL(insb);
 EXPORT_SYMBOL(insw);
+EXPORT_SYMBOL(insl);
+
+EXPORT_SYMBOL(_memcpy_fromio);
+EXPORT_SYMBOL(_memcpy_toio);
+EXPORT_SYMBOL(_memset_io);
 
 	/* address translation */
 #ifndef __virt_to_phys__is_a_macro
@@ -98,7 +134,9 @@ EXPORT_SYMBOL(__virt_to_bus);
 EXPORT_SYMBOL(__bus_to_virt);
 #endif
 
+#ifndef CONFIG_NO_PGT_CACHE
 EXPORT_SYMBOL(quicklists);
+#endif
 EXPORT_SYMBOL(__bad_pmd);
 EXPORT_SYMBOL(__bad_pmd_kernel);
 
@@ -167,3 +205,17 @@ EXPORT_SYMBOL(find_next_zero_bit);
 EXPORT_SYMBOL(armidlist);
 EXPORT_SYMBOL(armidindex);
 EXPORT_SYMBOL(elf_platform);
+
+	/* syscalls */
+EXPORT_SYMBOL(sys_write);
+EXPORT_SYMBOL(sys_read);
+EXPORT_SYMBOL(sys_lseek);
+EXPORT_SYMBOL(sys_open);
+EXPORT_SYMBOL(sys_exit);
+EXPORT_SYMBOL(sys_wait4);
+
+	/* semaphores */
+EXPORT_SYMBOL_NOVERS(__down_failed);
+EXPORT_SYMBOL_NOVERS(__down_interruptible_failed);
+EXPORT_SYMBOL_NOVERS(__up_wakeup);
+
diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c
new file mode 100644
index 000000000..9994fdd4a
--- /dev/null
+++ b/arch/arm/kernel/arthur.c
@@ -0,0 +1,88 @@
+/*
+ * Arthur personality
+ * Copyright (C) 1998 Philip Blundell
+ */
+
+#include <linux/personality.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+
+#include <asm/ptrace.h>
+
+/* RISC OS doesn't have many signals, and a lot of those that it does
+   have don't map easily to any Linux equivalent.  Never mind.  */
+
+#define RISCOS_SIGABRT		1
+#define RISCOS_SIGFPE		2
+#define RISCOS_SIGILL		3
+#define RISCOS_SIGINT		4
+#define RISCOS_SIGSEGV		5
+#define RISCOS_SIGTERM		6
+#define RISCOS_SIGSTAK		7
+#define RISCOS_SIGUSR1		8
+#define RISCOS_SIGUSR2		9
+#define RISCOS_SIGOSERROR	10
+
+static unsigned long riscos_to_linux_signals[32] = {
+	0,	1,	2,	3,	4,	5,	6,	7,
+	8,	9,	10,	11,	12,	13,	14,	15,
+	16,	17,	18,	19,	20,	21,	22,	23,
+	24,	25,	26,	27,	28,	29,	30,	31
+};
+
+static unsigned long linux_to_riscos_signals[32] = {
+	0,		-1,		RISCOS_SIGINT,	-1,
+       	RISCOS_SIGILL,	5,		RISCOS_SIGABRT,	7,
+	RISCOS_SIGFPE,	9,		RISCOS_SIGUSR1,	RISCOS_SIGSEGV,	
+	RISCOS_SIGUSR2,	13,		14,		RISCOS_SIGTERM,
+	16,		17,		18,		19,
+	20,		21,		22,		23,
+	24,		25,		26,		27,
+	28,		29,		30,		31
+};
+
+static void arthur_lcall7(int nr, struct pt_regs *regs)
+{
+	struct siginfo info;
+	info.si_signo = SIGSWI;
+	info.si_code = nr;
+	/* Bounce it to the emulator */
+	send_sig_info(SIGSWI, &info, current);
+}
+
+static struct exec_domain riscos_exec_domain = {
+	"Arthur",	/* name */
+	(lcall7_func)arthur_lcall7,
+	PER_RISCOS, PER_RISCOS,
+	riscos_to_linux_signals,
+	linux_to_riscos_signals,
+#ifdef MODULE
+	&__this_module,	/* No usage counter. */
+#else
+	NULL,
+#endif
+	NULL		/* Nothing after this in the list. */
+};
+
+/*
+ * We could do with some locking to stop Arthur being removed while
+ * processes are using it.
+ */
+
+#ifdef MODULE
+int init_module(void)
+#else
+int initialise_arthur(void)
+#endif
+{
+	return register_exec_domain(&riscos_exec_domain);
+}
+
+#ifdef MODULE
+void cleanup_module(void)
+{
+	unregister_exec_domain(&riscos_exec_domain);
+}
+#endif
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 46f71fa92..154e3aeab 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -31,7 +31,7 @@
 		.long	SYMBOL_NAME(sys_lseek)
 /* 20 */	.long	SYMBOL_NAME(sys_getpid)
 		.long	SYMBOL_NAME(sys_mount_wrapper)
-		.long	SYMBOL_NAME(sys_umount)
+		.long	SYMBOL_NAME(sys_oldumount)
 		.long	SYMBOL_NAME(sys_setuid)
 		.long	SYMBOL_NAME(sys_getuid)
 /* 25 */	.long	SYMBOL_NAME(sys_stime)
@@ -61,7 +61,7 @@
 		.long	SYMBOL_NAME(sys_geteuid)
 /* 50 */	.long	SYMBOL_NAME(sys_getegid)
 		.long	SYMBOL_NAME(sys_acct)
-		.long	SYMBOL_NAME(sys_ni_syscall)		/* was sys_phys */
+		.long	SYMBOL_NAME(sys_umount)
 		.long	SYMBOL_NAME(sys_ni_syscall)		/* was sys_lock */
 		.long	SYMBOL_NAME(sys_ioctl)
 /* 55 */	.long	SYMBOL_NAME(sys_fcntl)
@@ -110,7 +110,7 @@
 		.long	SYMBOL_NAME(sys_ni_syscall)		/* was sys_profil */
 		.long	SYMBOL_NAME(sys_statfs)
 /* 100 */	.long	SYMBOL_NAME(sys_fstatfs)
-		.long	SYMBOL_NAME(sys_ni_syscall)		/* .long	_sys_ioperm */
+		.long	SYMBOL_NAME(sys_ni_syscall)
 		.long	SYMBOL_NAME(sys_socketcall)
 		.long	SYMBOL_NAME(sys_syslog)
 		.long	SYMBOL_NAME(sys_setitimer)
@@ -119,7 +119,7 @@
 		.long	SYMBOL_NAME(sys_newlstat)
 		.long	SYMBOL_NAME(sys_newfstat)
 		.long	SYMBOL_NAME(sys_uname)
-/* 110 */	.long	SYMBOL_NAME(sys_iopl)
+/* 110 */	.long	SYMBOL_NAME(sys_ni_syscall)
 		.long	SYMBOL_NAME(sys_vhangup)
 		.long	SYMBOL_NAME(sys_idle)
 		.long	SYMBOL_NAME(sys_syscall)		/* call a syscall */
@@ -196,6 +196,10 @@
 		.long	SYMBOL_NAME(sys_capget)
 /* 185 */	.long	SYMBOL_NAME(sys_capset)
 		.long	SYMBOL_NAME(sys_sigaltstack_wrapper)
+		.long	SYMBOL_NAME(sys_sendfile)
+		.long	SYMBOL_NAME(sys_ni_syscall)
+		.long	SYMBOL_NAME(sys_ni_syscall)
+/* 190 */	.long	SYMBOL_NAME(sys_vfork_wrapper)
 
 		.rept	NR_syscalls-186
 			.long	SYMBOL_NAME(sys_ni_syscall)
diff --git a/arch/arm/kernel/dec21285.c b/arch/arm/kernel/dec21285.c
index aa66ee04a..c4103abee 100644
--- a/arch/arm/kernel/dec21285.c
+++ b/arch/arm/kernel/dec21285.c
@@ -8,17 +8,17 @@
 #include <linux/pci.h>
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 
 #include <asm/irq.h>
 #include <asm/system.h>
+#include <asm/hardware.h>
 
-#define MAX_SLOTS		20
+#define MAX_SLOTS		21
 
 extern void pcibios_fixup_ebsa285(struct pci_dev *dev);
 extern void pcibios_init_ebsa285(void);
-extern void pcibios_fixup_vnc(struct pci_dev *dev);
-extern void pcibios_init_vnc(void);
 
 int
 pcibios_present(void)
@@ -33,11 +33,12 @@ pcibios_base_address(unsigned char bus, unsigned char dev_fn)
 		int slot = PCI_SLOT(dev_fn);
 		
 		if (slot < MAX_SLOTS)
-			return 0xf8c00000 + (slot << 11) + (PCI_FUNC(dev_fn) << 8);
+			return PCICFG0_BASE + 0xc00000 +
+				(slot << 11) + (PCI_FUNC(dev_fn) << 8);
 		else
 			return 0;
 	} else
-		return 0xf9000000 | (bus << 16) | (dev_fn << 8);
+		return PCICFG1_BASE | (bus << 16) | (dev_fn << 8);
 }
 
 int
@@ -151,10 +152,7 @@ __initfunc(void pcibios_fixup(void))
 	struct pci_dev *dev;
 
 	for (dev = pci_devices; dev; dev = dev->next) {
-		if (machine_is_ebsa285() || machine_is_cats())
-			pcibios_fixup_ebsa285(dev);
-		if (machine_is_netwinder())
-			pcibios_fixup_vnc(dev);
+		pcibios_fixup_ebsa285(dev);
 
 		pcibios_write_config_byte(dev->bus->number, dev->devfn,
 					  PCI_INTERRUPT_LINE, dev->irq);
@@ -164,18 +162,83 @@ __initfunc(void pcibios_fixup(void))
 			dev->bus->number, dev->devfn,
 			dev->vendor, dev->device, dev->irq);
 	}
-	if (machine_is_netwinder())
-		hw_init();
+
+	hw_init();
 }
 
 __initfunc(void pcibios_init(void))
 {
-	if (machine_is_ebsa285() || machine_is_cats())
-		pcibios_init_ebsa285();
-	if (machine_is_netwinder())
-		pcibios_init_vnc();
-
-	printk("DEC21285 PCI revision %02X\n", *(unsigned char *)0xfe000008);
+	unsigned int mem_size = (unsigned int)high_memory - PAGE_OFFSET;
+	unsigned long cntl;
+
+	*CSR_SDRAMBASEMASK    = (mem_size - 1) & 0x0ffc0000;
+	*CSR_SDRAMBASEOFFSET  = 0;
+	*CSR_ROMBASEMASK      = 0x80000000;
+	*CSR_CSRBASEMASK      = 0;
+	*CSR_CSRBASEOFFSET    = 0;
+	*CSR_PCIADDR_EXTN     = 0;
+
+#ifdef CONFIG_HOST_FOOTBRIDGE
+	/*
+	 * Against my better judgement, Philip Blundell still seems
+	 * to be saying that we should initialise the PCI stuff here
+	 * when the PCI_CFN bit is not set, dispite my comment below,
+	 * which he decided to remove.  If it is not set, then
+	 * the card is in add-in mode, and we're in a machine where
+	 * the bus is set up by 'others'.
+	 *
+	 * We should therefore not mess about with the mapping in
+	 * anyway, and we should not be using the virt_to_bus functions
+	 * that exist in the HOST architecture mode (since they assume
+	 * a fixed mapping).
+	 *
+	 * Instead, you should be using ADDIN mode, which allows for
+	 * this situation.  This does assume that you have correctly
+	 * initialised the PCI bus, which you must have done to get
+	 * your PC booted.
+	 *
+	 * Unfortunately, he seems to be blind to this.  I guess he'll
+	 * also remove all this.
+	 *
+	 * And THIS COMMENT STAYS, even if this gets patched, thank
+	 * you.
+	 */
+
+	/*
+	 * Map our SDRAM at a known address in PCI space, just in case
+	 * the firmware had other ideas.  Using a nonzero base is
+	 * necessary, since some VGA cards forcefully use PCI addresses
+	 * in the range 0x000a0000 to 0x000c0000. (eg, S3 cards).
+	 *
+	 * NOTE! If you need to chec the PCI_CFN bit in the SA110
+	 * control register then you've configured the kernel wrong.
+	 * If you're not using host mode, then DO NOT set
+	 * CONFIG_HOST_FOOTBRIDGE, but use CONFIG_ADDIN_FOOTBRIDGE
+	 * instead.  In this case, you MUST supply some firmware
+	 * to allow your PC to boot, plus we should not modify the
+	 * mappings that the PC BIOS has set up for us.
+	 */
+	*CSR_PCICACHELINESIZE = 0x00002008;
+	*CSR_PCICSRBASE       = 0;
+	*CSR_PCICSRIOBASE     = 0;
+	*CSR_PCISDRAMBASE     = virt_to_bus((void *)PAGE_OFFSET);
+	*CSR_PCIROMBASE       = 0;
+	*CSR_PCICMD           = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+				PCI_COMMAND_MASTER | PCI_COMMAND_FAST_BACK |
+				PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY |
+				(1 << 31) | (1 << 29) | (1 << 28) | (1 << 24);
+#endif
+
+	/*
+	 * Clear any existing errors - we aren't
+	 * interested in historical data...
+	 */
+	cntl = *CSR_SA110_CNTL & 0xffffde07;
+	*CSR_SA110_CNTL = cntl | SA110_CNTL_RXSERR;
+
+	pcibios_init_ebsa285();
+
+	printk(KERN_DEBUG"PCI: DEC21285 revision %02lX\n", *CSR_CLASSREV & 0xff);
 }
 
 __initfunc(void pcibios_fixup_bus(struct pci_bus *bus))
diff --git a/arch/arm/kernel/dma-a5k.c b/arch/arm/kernel/dma-a5k.c
index 18bbf0c9c..df02ea54e 100644
--- a/arch/arm/kernel/dma-a5k.c
+++ b/arch/arm/kernel/dma-a5k.c
@@ -12,7 +12,6 @@
 #include <asm/fiq.h>
 #include <asm/io.h>
 #include <asm/hardware.h>
-#include <asm/pgtable.h>
 
 #include "dma.h"
 
@@ -37,8 +36,9 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma)
 	if (channel != DMA_VIRTUAL_FLOPPY)
 		printk("arch_dma_count: invalid channel %d\n", channel);
 	else {
-		extern int floppy_fiqresidual(void);
-		return floppy_fiqresidual();
+		struct pt_regs regs;
+		get_fiq_regs(&regs);
+		return regs.ARM_r9;
 	}
 	return 0;
 }
@@ -48,6 +48,7 @@ void arch_enable_dma(dmach_t channel, dma_t *dma)
 	if (channel != DMA_VIRTUAL_FLOPPY)
 		printk("arch_enable_dma: invalid channel %d\n", channel);
 	else {
+		struct pt_regs regs;
 		void *fiqhandler_start;
 		unsigned int fiqhandler_length;
 		extern void floppy_fiqsetup(unsigned long len, unsigned long addr,
@@ -67,8 +68,10 @@ void arch_enable_dma(dmach_t channel, dma_t *dma)
 			return;
 		}
 		memcpy((void *)0x1c, fiqhandler_start, fiqhandler_length);
-		flush_page_to_ram(0);
-		floppy_fiqsetup(dma->buf.length, __bus_to_virt(dma->buf.address), (int)PCIO_FLOPPYDMABASE);
+		regs.ARM_r9 = dma->buf.length;
+		regs.ARM_r10 = __bus_to_virt(dma->buf.address);
+		regs.ARM_fp = (int)PCIO_FLOPPYDMABASE;
+		set_fiq_regs(&regs);
 		enable_irq(dma->dma_irq);
 	}
 }
@@ -83,6 +86,11 @@ void arch_disable_dma(dmach_t channel, dma_t *dma)
 	}
 }
 
+int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns)
+{
+	return 0;
+}
+
 __initfunc(void arch_dma_init(dma_t *dma))
 {
 	dma[DMA_VIRTUAL_FLOPPY].dma_irq = 64;
diff --git a/arch/arm/kernel/dma-arc.c b/arch/arm/kernel/dma-arc.c
index 27a139ad4..9be27bdae 100644
--- a/arch/arm/kernel/dma-arc.c
+++ b/arch/arm/kernel/dma-arc.c
@@ -1,10 +1,11 @@
 /*
  * arch/arm/kernel/dma-arc.c
  *
- * Copyright (C) 1998 Dave Gilbert / Russell King
+ * Copyright (C) 1998-1999 Dave Gilbert / Russell King
  *
  * DMA functions specific to Archimedes architecture
  */
+#include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 
@@ -14,7 +15,7 @@
 
 #include "dma.h"
 
-int arch_request_dma(dmach_t channel, dma_t *dma)
+int arch_request_dma(dmach_t channel, dma_t *dma, const char * dev_id)
 {
 	if (channel == DMA_VIRTUAL_FLOPPY0 ||
 	    channel == DMA_VIRTUAL_FLOPPY1)
@@ -25,16 +26,12 @@ int arch_request_dma(dmach_t channel, dma_t *dma)
 
 void arch_free_dma(dmach_t channel, dma_t *dma)
 {
-	if (channel != DMA_VIRTUAL_FLOPPY0 &&
-	    channel != DMA_VIRTUAL_FLOPPY1)
-		return 0;
-	else
-		return -EINVAL;
 }
 
 void arch_enable_dma(dmach_t channel, dma_t *dma)
 {
 	switch (channel) {
+#ifdef CONFIG_BLK_DEV_FD
 	case DMA_VIRTUAL_FLOPPY0: { /* Data DMA */
 		switch (dma->dma_mode) {
 		case DMA_MODE_READ: /* read */
@@ -96,9 +93,38 @@ void arch_enable_dma(dmach_t channel, dma_t *dma)
 		restore_flags(flags);
 	}
 	break;
+#endif
 	}
 }
 
+int arch_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+  switch (channel) {
+#ifdef CONFIG_BLK_DEV_FD
+    case DMA_VIRTUAL_FLOPPY0: { /* Data DMA */
+        extern unsigned int fdc1772_bytestogo;
+
+        /* 10/1/1999 DAG - I presume its the number of bytes left? */
+        return fdc1772_bytestogo;
+      };
+      break;
+
+    case DMA_VIRTUAL_FLOPPY1: { /* Command completed */
+        /* 10/1/1999 DAG - Presume whether there is an outstanding command? */
+        extern unsigned int fdc1772_fdc_int_done;
+
+        return (fdc1772_fdc_int_done==0)?1:0; /* Explicit! If the int done is 0 then 1 int to go */
+      };
+      break;
+
+#endif
+
+    default:
+      printk("dma-arc.c:arch_get_dma_residue called with unknown/unconfigured DMA channel\n");
+      return 0;
+  };
+}
+
 void arch_disable_dma(dmach_t channel, dma_t *dma)
 {
 	if (channel != DMA_VIRTUAL_FLOPPY0 &&
@@ -108,6 +134,11 @@ void arch_disable_dma(dmach_t channel, dma_t *dma)
 		disable_irq(dma->dma_irq);
 }
 
+int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns)
+{
+	return 0;
+}
+
 __initfunc(void arch_dma_init(dma_t *dma))
 {
 	dma[DMA_VIRTUAL_FLOPPY0].dma_irq = 64;
diff --git a/arch/arm/kernel/dma-dummy.c b/arch/arm/kernel/dma-dummy.c
index be72a8965..db46ef1c3 100644
--- a/arch/arm/kernel/dma-dummy.c
+++ b/arch/arm/kernel/dma-dummy.c
@@ -9,6 +9,10 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 
+#include <asm/spinlock.h>
+
+spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED;
+
 int request_dma(int channel, const char *device_id)
 {
 	return -EINVAL;
diff --git a/arch/arm/kernel/dma-ebsa285.c b/arch/arm/kernel/dma-footbridge.c
index f1c42dac2..a355283dc 100644
--- a/arch/arm/kernel/dma-ebsa285.c
+++ b/arch/arm/kernel/dma-footbridge.c
@@ -6,7 +6,9 @@
  * DMA functions specific to EBSA-285/CATS architectures
  *
  * Changelog:
- *  09/11/1998	RMK	Split out ISA DMA functions to dma-isa.c
+ *  09-Nov-1998	RMK	Split out ISA DMA functions to dma-isa.c
+ *  17-Mar-1999	RMK	Allow any EBSA285-like architecture to have
+ *			ISA DMA controllers.
  */
 
 #include <linux/config.h>
@@ -16,7 +18,6 @@
 #include <linux/init.h>
 
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/hardware.h>
@@ -24,16 +25,22 @@
 #include "dma.h"
 #include "dma-isa.h"
 
+#ifdef CONFIG_ISA_DMA
+static int has_isa_dma;
+#else
+#define has_isa_dma 0
+#endif
+
 int arch_request_dma(dmach_t channel, dma_t *dma, const char *dev_name)
 {
 	switch (channel) {
-	case 0:
-	case 1:	/* 21285 internal channels */
+	case _DC21285_DMA(0):
+	case _DC21285_DMA(1):	/* 21285 internal channels */
 		return 0;
 
-	case 2 ... 9:
-		if (machine_is_cats())
-			return isa_request_dma(channel - 2, dma, dev_name);
+	case _ISA_DMA(0) ... _ISA_DMA(7):
+		if (has_isa_dma)
+			return isa_request_dma(channel - _ISA_DMA(0), dma, dev_name);
 	}
 
 	return -EINVAL;
@@ -49,14 +56,13 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma)
 	int residue = 0;
 
 	switch (channel) {
-	case 0:
-	case 1:
+	case _DC21285_DMA(0):
+	case _DC21285_DMA(1):
 		break;
-#ifdef CONFIG_CATS
-	case 2 ... 9:
-		if (machine_is_cats())
-			residue = isa_get_dma_residue(channel - 2);
-#endif
+
+	case _ISA_DMA(0) ... _ISA_DMA(7):
+		if (has_isa_dma)
+			residue = isa_get_dma_residue(channel - _ISA_DMA(0), dma);
 	}
 	return residue;
 }
@@ -64,38 +70,43 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma)
 void arch_enable_dma(dmach_t channel, dma_t *dma)
 {
 	switch (channel) {
-	case 0:
-	case 1:
+	case _DC21285_DMA(0):
+	case _DC21285_DMA(1):
 		/*
 		 * Not yet implemented
 		 */
 		break;
-#ifdef CONFIG_CATS
-	case 2 ... 9:
-		if (machine_is_cats())
-			isa_enable_dma(channel - 2, dma);
-#endif
+
+	case _ISA_DMA(0) ... _ISA_DMA(7):
+		if (has_isa_dma)
+			isa_enable_dma(channel - _ISA_DMA(0), dma);
 	}
 }
 
 void arch_disable_dma(dmach_t channel, dma_t *dma)
 {
 	switch (channel) {
-	case 0:
-	case 1:
+	case _DC21285_DMA(0):
+	case _DC21285_DMA(1):
 		/*
 		 * Not yet implemented
 		 */
 		break;
-#ifdef CONFIG_CATS
-	case 2 ... 9:
-		if (machine_is_cats())
-			isa_disable_dma(channel - 2, dma);
-#endif
+
+	case _ISA_DMA(0) ... _ISA_DMA(7):
+		if (has_isa_dma)
+			isa_disable_dma(channel - _ISA_DMA(0), dma);
 	}
 }
 
+int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle_ns)
+{
+	return 0;
+}
+
 __initfunc(void arch_dma_init(dma_t *dma))
 {
-	/* Nothing to do */
+#ifdef CONFIG_ISA_DMA
+	has_isa_dma = isa_init_dma();
+#endif
 }
diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c
index bdf7c6147..19be50433 100644
--- a/arch/arm/kernel/dma-isa.c
+++ b/arch/arm/kernel/dma-isa.c
@@ -11,6 +11,7 @@
  *  Copyright (C) 1998 Phil Blundell
  */
 #include <linux/sched.h>
+#include <linux/init.h>
 
 #include <asm/dma.h>
 #include <asm/io.h>
@@ -18,6 +19,11 @@
 #include "dma.h"
 #include "dma-isa.h"
 
+#define ISA_DMA_MODE_READ	0x44
+#define ISA_DMA_MODE_WRITE	0x48
+#define ISA_DMA_MODE_CASCADE	0xc0
+#define ISA_DMA_AUTOINIT	0x10
+
 #define ISA_DMA_MASK		0
 #define ISA_DMA_MODE		1
 #define ISA_DMA_CLRFF		2
@@ -40,10 +46,7 @@ static unsigned int isa_dma_port[8][7] = {
 
 int isa_request_dma(int channel, dma_t *dma, const char *dev_name)
 {
-	if (channel != 4)
-		return 0;
-
-	return -EINVAL;
+	return 0;
 }
 
 void isa_free_dma(int channel, dma_t *dma)
@@ -56,25 +59,27 @@ int isa_get_dma_residue(int channel, dma_t *dma)
 	unsigned int io_port = isa_dma_port[channel][ISA_DMA_COUNT];
 	int count;
 
-	count = 1 + inb(io_port) + (inb(io_port) << 8);
+	count = 1 + inb(io_port);
+	count |= inb(io_port) << 8;
 
 	return channel < 4 ? count : (count << 1);
 }
 
 void isa_enable_dma(int channel, dma_t *dma)
 {
-	unsigned long address, length;
-
 	if (dma->invalid) {
+		unsigned long address, length;
+		unsigned int mode;
+
 		address = dma->buf.address;
 		length  = dma->buf.length - 1;
 
-		outb(address >> 24, isa_dma_port[channel][ISA_DMA_PGHI]);
 		outb(address >> 16, isa_dma_port[channel][ISA_DMA_PGLO]);
+		outb(address >> 24, isa_dma_port[channel][ISA_DMA_PGHI]);
 
 		if (channel >= 4) {
 			address >>= 1;
-			length = (length >> 1) & 0xfe; /* why &0xfe? */
+			length >>= 1;
 		}
 
 		outb(0, isa_dma_port[channel][ISA_DMA_CLRFF]);
@@ -85,17 +90,31 @@ void isa_enable_dma(int channel, dma_t *dma)
 		outb(length, isa_dma_port[channel][ISA_DMA_COUNT]);
 		outb(length >> 8, isa_dma_port[channel][ISA_DMA_COUNT]);
 
-		outb(dma->dma_mode | (channel & 3), isa_dma_port[channel][ISA_DMA_MODE]);
+		mode = channel & 3;
 
-		switch (dma->dma_mode) {
+		switch (dma->dma_mode & DMA_MODE_MASK) {
 		case DMA_MODE_READ:
+			mode |= ISA_DMA_MODE_READ;
 			dma_cache_inv(__bus_to_virt(dma->buf.address), dma->buf.length);
 			break;
 
 		case DMA_MODE_WRITE:
+			mode |= ISA_DMA_MODE_WRITE;
 			dma_cache_wback(__bus_to_virt(dma->buf.address), dma->buf.length);
 			break;
+
+		case DMA_MODE_CASCADE:
+			mode |= ISA_DMA_MODE_CASCADE;
+			break;
+
+		default:
+			break;
 		}
+
+		if (dma->dma_mode & DMA_AUTOINIT)
+			mode |= ISA_DMA_AUTOINIT;
+
+		outb(mode, isa_dma_port[channel][ISA_DMA_MODE]);
 		dma->invalid = 0;
 	}
 	outb(channel & 3, isa_dma_port[channel][ISA_DMA_MASK]);
@@ -105,3 +124,56 @@ void isa_disable_dma(int channel, dma_t *dma)
 {
 	outb(channel | 4, isa_dma_port[channel][ISA_DMA_MASK]);
 }
+
+__initfunc(int isa_init_dma(void))
+{
+	int dmac_found;
+
+	outb(0xff, 0x0d);
+	outb(0xff, 0xda);
+
+	outb(0x55, 0x00);
+	outb(0xaa, 0x00);
+
+	dmac_found = inb(0x00) == 0x55 && inb(0x00) == 0xaa;
+
+	if (dmac_found) {
+		int channel;
+
+		for (channel = 0; channel < 8; channel++)
+			isa_disable_dma(channel, NULL);
+
+		outb(0x40, 0x0b);
+		outb(0x41, 0x0b);
+		outb(0x42, 0x0b);
+		outb(0x43, 0x0b);
+
+		outb(0xc0, 0xd6);
+		outb(0x41, 0xd6);
+		outb(0x42, 0xd6);
+		outb(0x43, 0xd6);
+
+		outb(0, 0xd4);
+
+		outb(0x10, 0x08);
+		outb(0x10, 0xd0);
+
+		/*
+		 * Is this correct?  According to
+		 * my documentation, it doesn't
+		 * appear to be.  It should be
+		 * outb(0x3f, 0x40b); outb(0x3f, 0x4d6);
+		 */
+		outb(0x30, 0x40b);
+		outb(0x31, 0x40b);
+		outb(0x32, 0x40b);
+		outb(0x33, 0x40b);
+		outb(0x31, 0x4d6);
+		outb(0x32, 0x4d6);
+		outb(0x33, 0x4d6);
+
+		request_dma(DMA_ISA_CASCADE, "cascade");
+	}
+
+	return dmac_found;
+}
diff --git a/arch/arm/kernel/dma-isa.h b/arch/arm/kernel/dma-isa.h
index 3fcbdb3c7..2640f6c3a 100644
--- a/arch/arm/kernel/dma-isa.h
+++ b/arch/arm/kernel/dma-isa.h
@@ -23,3 +23,7 @@ void isa_enable_dma(int channel, dma_t *dma);
  */
 void isa_disable_dma(int channel, dma_t *dma);
 
+/*
+ * Initialise DMA
+ */
+int isa_init_dma(void);
diff --git a/arch/arm/kernel/dma-rpc.c b/arch/arm/kernel/dma-rpc.c
index 00cd95e79..d3fcd9116 100644
--- a/arch/arm/kernel/dma-rpc.c
+++ b/arch/arm/kernel/dma-rpc.c
@@ -11,10 +11,10 @@
 #include <linux/init.h>
 
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/dma.h>
 #include <asm/fiq.h>
 #include <asm/io.h>
+#include <asm/iomd.h>
 #include <asm/hardware.h>
 #include <asm/uaccess.h>
 
@@ -223,8 +223,9 @@ int arch_get_dma_residue(dmach_t channel, dma_t *dma)
 		break;
 
 	case DMA_VIRTUAL_FLOPPY: {
-		extern int floppy_fiqresidual(void);
-		residue = floppy_fiqresidual();
+		struct pt_regs regs;
+		get_fiq_regs(&regs);
+		return regs.ARM_r9;
 		}
 		break;
 	}
@@ -286,7 +287,6 @@ void arch_enable_dma(dmach_t channel, dma_t *dma)
 		set_fiq_handler(fiqhandler_start, fiqhandler_length);
 		set_fiq_regs(&regs);
 		enable_irq(dma->dma_irq);
-
 		}
 		break;
 
@@ -319,6 +319,46 @@ void arch_disable_dma(dmach_t channel, dma_t *dma)
 	}
 }
 
+int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle)
+{
+	int tcr, speed;
+
+	if (cycle < 188)
+		speed = 3;
+	else if (cycle <= 250)
+		speed = 2;
+	else if (cycle < 438)
+		speed = 1;
+	else
+		speed = 0;
+
+	tcr = inb(IOMD_DMATCR);
+	speed &= 3;
+
+	switch (channel) {
+	case DMA_0:
+		tcr = (tcr & ~0x03) | speed;
+		break;
+
+	case DMA_1:
+		tcr = (tcr & ~0x0c) | (speed << 2);
+		break;
+
+	case DMA_2:
+		tcr = (tcr & ~0x30) | (speed << 4);
+		break;
+
+	case DMA_3:
+		tcr = (tcr & ~0xc0) | (speed << 6);
+		break;
+
+	default:
+		break;
+	}
+
+	outb(tcr, IOMD_DMATCR);
+}
+
 __initfunc(void arch_dma_init(dma_t *dma))
 {
 	outb(0, IOMD_IO0CR);
@@ -326,7 +366,7 @@ __initfunc(void arch_dma_init(dma_t *dma))
 	outb(0, IOMD_IO2CR);
 	outb(0, IOMD_IO3CR);
 
-//	outb(0xf0, IOMD_DMATCR);
+	outb(0xa0, IOMD_DMATCR);
 
 	dma[0].dma_base = ioaddr(IOMD_IO0CURA);
 	dma[0].dma_irq  = IRQ_DMA0;
diff --git a/arch/arm/kernel/dma-vnc.c b/arch/arm/kernel/dma-vnc.c
deleted file mode 100644
index 132fa627a..000000000
--- a/arch/arm/kernel/dma-vnc.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * arch/arm/kernel/dma-vnc.c
- *
- * Copyright (C) 1998 Russell King
- */
-#include <linux/sched.h>
-#include <linux/malloc.h>
-#include <linux/mman.h>
-#include <linux/init.h>
-
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <asm/io.h>
-#include <asm/hardware.h>
-
-#include "dma.h"
-#include "dma-isa.h"
-
-int arch_request_dma(dmach_t channel, dma_t *dma, const char *dev_name)
-{
-	if (channel < 8)
-		return isa_request_dma(channel, dma, dev_name);
-	return -EINVAL;
-}
-
-void arch_free_dma(dmach_t channel, dma_t *dma)
-{
-	isa_free_dma(channel, dma);
-}
-
-int arch_get_dma_residue(dmach_t channel, dma_t *dma)
-{
-	return isa_get_dma_residue(channel, dma);
-}
-
-void arch_enable_dma(dmach_t channel, dma_t *dma)
-{
-	isa_enable_dma(channel, dma);
-}
-
-void arch_disable_dma(dmach_t channel, dma_t *dma)
-{
-	isa_disable_dma(channel, dma);
-}
-
-__initfunc(void arch_dma_init(dma_t *dma))
-{
-	/* Nothing to do */
-}
-
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
index a164073ae..219e1f0f2 100644
--- a/arch/arm/kernel/dma.c
+++ b/arch/arm/kernel/dma.c
@@ -21,7 +21,6 @@
 #include <linux/init.h>
 
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/hardware.h>
 #include <asm/io.h>
@@ -201,6 +200,12 @@ void disable_dma (dmach_t channel)
 		printk (KERN_ERR "Trying to disable free DMA%d\n", channel);
 }
 
+void set_dma_speed(dmach_t channel, int cycle_ns)
+{
+	dma_chan[channel].speed =
+	   arch_set_dma_speed(channel, &dma_chan[channel], cycle_ns);
+}
+
 int get_dma_residue(dmach_t channel)
 {
 	return arch_get_dma_residue(channel, &dma_chan[channel]);
@@ -214,6 +219,7 @@ EXPORT_SYMBOL(set_dma_count);
 EXPORT_SYMBOL(set_dma_mode);
 EXPORT_SYMBOL(get_dma_residue);
 EXPORT_SYMBOL(set_dma_sg);
+EXPORT_SYMBOL(set_dma_speed);
 
 __initfunc(void init_dma(void))
 {
diff --git a/arch/arm/kernel/dma.h b/arch/arm/kernel/dma.h
index e4c72c6af..33db3b03b 100644
--- a/arch/arm/kernel/dma.h
+++ b/arch/arm/kernel/dma.h
@@ -15,6 +15,7 @@ typedef struct {
 	unsigned int	active:1;	/* Transfer active		*/
 	unsigned int	invalid:1;	/* Address/Count changed	*/
 	dmamode_t	dma_mode;	/* DMA mode			*/
+	int		speed;		/* DMA speed			*/
 
 	unsigned int	lock;		/* Device is allocated		*/
 	const char	*device_id;	/* Device name			*/
@@ -63,6 +64,15 @@ void arch_disable_dma(dmach_t channel, dma_t *dma);
  */
 int arch_get_dma_residue(dmach_t channel, dma_t *dma);
 
+/* Prototype: int arch_set_dma_speed(channel, dma, cycle)
+ * Purpose  : Convert a cycle time to a register setting
+ * Params   : channel - DMA channel number
+ *          : dma     - DMA structure for channel
+ *          : cycle   - cycle time in NS
+ * Returns  : setting for 'dma->speed'
+ */
+int arch_set_dma_speed(dmach_t channel, dma_t *dma, int cycle);
+
 /* Prototype: void arch_dma_init(dma)
  * Purpose  : Initialise architecture specific DMA
  * Params   : dma - pointer to array of DMA structures
diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c
index fe1c75f5c..dd4bf670c 100644
--- a/arch/arm/kernel/ecard.c
+++ b/arch/arm/kernel/ecard.c
@@ -7,32 +7,43 @@
  *
  * Created from information from Acorns RiscOS3 PRMs
  *
- * 08-Dec-1996	RMK	Added code for the 9'th expansion card - the ether podule slot.
+ * 08-Dec-1996	RMK	Added code for the 9'th expansion card - the ether
+ *			podule slot.
  * 06-May-1997  RMK	Added blacklist for cards whose loader doesn't work.
- * 12-Sep-1997	RMK	Created new handling of interrupt enables/disables - cards can
- *			now register their own routine to control interrupts (recommended).
- * 29-Sep-1997	RMK	Expansion card interrupt hardware not being re-enabled on reset from
- *			Linux. (Caused cards not to respond under RiscOS without hard reset).
+ * 12-Sep-1997	RMK	Created new handling of interrupt enables/disables
+ *			- cards can now register their own routine to control
+ *			interrupts (recommended).
+ * 29-Sep-1997	RMK	Expansion card interrupt hardware not being re-enabled
+ *			on reset from Linux. (Caused cards not to respond
+ *			under RiscOS without hard reset).
  * 15-Feb-1998	RMK	Added DMA support
  * 12-Sep-1998	RMK	Added EASI support
+ * 10-Jan-1999	RMK	Run loaders in a simulated RISC OS environment.
+ * 17-Apr-1999	RMK	Support for EASI Type C cycles.
  */
 
 #define ECARD_C
+#define __KERNEL_SYSCALLS__
 
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/unistd.h>
 #include <linux/init.h>
 
-#include <asm/io.h>
-#include <asm/hardware.h>
+#include <asm/dma.h>
 #include <asm/ecard.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/dma.h>
+#include <asm/pgtable.h>
 
 #ifdef CONFIG_ARCH_ARC
 #include <asm/arch/oldlatches.h>
@@ -40,45 +51,420 @@
 #define oldlatch_init()
 #endif
 
-#define BLACKLIST_NAME(m,p,s)	{ m, p, NULL, s }
-#define BLACKLIST_LOADER(m,p,l)	{ m, p, l, NULL }
-#define BLACKLIST_NOLOADER(m,p)	{ m, p, noloader, blacklisted_str }
-#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE)
+enum req {
+	req_readbytes,
+	req_reset
+};
 
-extern unsigned long atomwide_serial_loader[], oak_scsi_loader[], noloader[];
-static const char blacklisted_str[] = "*loader s/w is not 32-bit compliant*";
+struct ecard_request {
+	enum req	req;
+	ecard_t		*ec;
+	unsigned int	address;
+	unsigned int	length;
+	unsigned int	use_loader;
+	void		*buffer;
+};
 
-static const struct expcard_blacklist {
+struct expcard_blacklist {
 	unsigned short	 manufacturer;
 	unsigned short	 product;
-	const loader_t 	 loader;
 	const char	*type;
-} blacklist[] = {
-/* Cards without names */
-    BLACKLIST_NAME(MANU_ACORN,		PROD_ACORN_ETHER1,	"Acorn Ether1"),
-
-/* Cards with corrected loader */
-  BLACKLIST_LOADER(MANU_ATOMWIDE,	PROD_ATOMWIDE_3PSERIAL,	atomwide_serial_loader),
-  BLACKLIST_LOADER(MANU_OAK,		PROD_OAK_SCSI,		oak_scsi_loader),
+};
 
-/* Supported cards with broken loader */
-  { MANU_ALSYSTEMS, PROD_ALSYS_SCSIATAPI, noloader, "AlSystems PowerTec SCSI" },
+static ecard_t *cards;
+static ecard_t *slot_to_expcard[MAX_ECARDS];
+static unsigned int ectcr;
+#ifdef HAS_EXPMASK
+static unsigned int have_expmask;
+#endif
 
-/* Unsupported cards with no loader */
-  BLACKLIST_NOLOADER(MANU_MCS,		PROD_MCS_CONNECT32)
+/* List of descriptions of cards which don't have an extended
+ * identification, or chunk directories containing a description.
+ */
+static const struct expcard_blacklist __init blacklist[] = {
+	{ MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" }
 };
 
+asmlinkage extern int
+ecard_loader_reset(volatile unsigned char *pa, loader_t loader);
+asmlinkage extern int
+ecard_loader_read(int off, volatile unsigned char *pa, loader_t loader);
 extern int setup_arm_irq(int, struct irqaction *);
+extern void do_ecard_IRQ(int, struct pt_regs *);
+
+
+static void
+ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs);
+
+static struct irqaction irqexpansioncard = {
+	ecard_irq_noexpmask, SA_INTERRUPT, 0, "expansion cards", NULL, NULL
+};
+
+static inline unsigned short
+ecard_getu16(unsigned char *v)
+{
+	return v[0] | v[1] << 8;
+}
 
+static inline signed long
+ecard_gets24(unsigned char *v)
+{
+	return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0);
+}
+
+static inline ecard_t *
+slot_to_ecard(unsigned int slot)
+{
+	return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL;
+}
+
+/* ===================== Expansion card daemon ======================== */
 /*
- * from linux/arch/arm/kernel/irq.c
+ * Since the loader programs on the expansion cards need to be run
+ * in a specific environment, create a separate task with this
+ * environment up, and pass requests to this task as and when we
+ * need to.
+ *
+ * This should allow 99% of loaders to be called from Linux.
+ *
+ * From a security standpoint, we trust the card vendors.  This
+ * may be a misplaced trust.
  */
-extern void do_ecard_IRQ(int irq, struct pt_regs *);
+#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE)
+#define POD_INT_ADDR(x)	((volatile unsigned char *)\
+			 ((BUS_ADDR((x)) - IO_BASE) + IO_START))
 
-static ecard_t expcard[MAX_ECARDS];
-static signed char irqno_to_expcard[16];
-static unsigned int ecard_numcards, ecard_numirqcards;
-static unsigned int have_expmask;
+static void
+ecard_task_reset(struct ecard_request *req)
+{
+	if (req->ec == NULL) {
+		ecard_t *ec;
+
+		for (ec = cards; ec; ec = ec->next) {
+			printk(KERN_DEBUG "Resetting card %d\n",
+			       ec->slot_no);
+
+			if (ec->loader)
+				ecard_loader_reset(POD_INT_ADDR(ec->podaddr),
+						   ec->loader);
+		}
+		printk(KERN_DEBUG "All cards reset\n");
+	} else if (req->ec->loader)
+		ecard_loader_reset(POD_INT_ADDR(req->ec->podaddr),
+				   req->ec->loader);
+}
+
+static void
+ecard_task_readbytes(struct ecard_request *req)
+{
+	unsigned char *buf = (unsigned char *)req->buffer;
+	volatile unsigned char *base_addr =
+		(volatile unsigned char *)POD_INT_ADDR(req->ec->podaddr);
+	unsigned int len = req->length;
+
+	if (req->ec->slot_no == 8) {
+		/*
+		 * The card maintains an index which
+		 * increments the address into a 4096-byte
+		 * page on each access.  We need to keep
+		 * track of the counter.
+		 */
+		static unsigned int index;
+		unsigned int offset, page;
+		unsigned char byte = 0; /* keep gcc quiet */
+
+		offset = req->address & 4095;
+		page   = req->address >> 12;
+
+		if (page > 256)
+			return;
+
+		page *= 4;
+
+		if (offset == 0 || index > offset) {
+			/*
+			 * We need to reset the index counter.
+			 */
+			*base_addr = 0;
+			index = 0;
+		}
+
+		while (index <= offset) {
+			byte = base_addr[page];
+			index += 1;
+		}
+
+		while (len--) {
+			*buf++ = byte;
+			if (len) {
+				byte = base_addr[page];
+				index += 1;
+			}
+		}
+	} else {
+		unsigned int off = req->address;
+
+		if (!req->use_loader || !req->ec->loader) {
+			off *= 4;
+			while (len--) {
+				*buf++ = base_addr[off];
+				off += 4;
+			}
+		} else {
+			while(len--) {
+				/*
+				 * The following is required by some
+				 * expansion card loader programs.
+				 */
+				*(unsigned long *)0x108 = 0;
+				*buf++ = ecard_loader_read(off++, base_addr,
+							   req->ec->loader);
+			}
+		}
+	}
+
+}
+
+#ifdef CONFIG_CPU_32
+static pid_t ecard_pid;
+static wait_queue_head_t ecard_wait;
+static wait_queue_head_t ecard_done;
+static struct ecard_request *ecard_req;
+
+/*
+ * Set up the expansion card daemon's environment.
+ */
+static void
+ecard_init_task(void)
+{
+	/* We want to set up the page tables for the following mapping:
+	 *  Virtual	Physical
+	 *  0x03000000	0x03000000
+	 *  0x03010000	unmapped
+	 *  0x03210000	0x03210000
+	 *  0x03400000	unmapped
+	 *  0x08000000	0x08000000
+	 *  0x10000000	unmapped
+	 *
+	 * FIXME: we don't follow this 100% yet.
+	 */
+	pgd_t *src_pgd, *dst_pgd;
+	unsigned int dst_addr = IO_START;
+
+	src_pgd = pgd_offset(current->mm, IO_BASE);
+	dst_pgd = pgd_offset(current->mm, dst_addr);
+
+	while (dst_addr < IO_START + IO_SIZE) {
+		*dst_pgd++ = *src_pgd++;
+		dst_addr += PGDIR_SIZE;
+	}
+
+	flush_tlb_range(current->mm, IO_START, IO_START + IO_SIZE);
+
+	dst_addr = EASI_START;
+	src_pgd = pgd_offset(current->mm, EASI_BASE);
+	dst_pgd = pgd_offset(current->mm, dst_addr);
+
+	while (dst_addr < EASI_START + EASI_SIZE) {
+		*dst_pgd++ = *src_pgd++;
+		dst_addr += PGDIR_SIZE;
+	}
+
+	flush_tlb_range(current->mm, EASI_START, EASI_START + EASI_SIZE);
+}
+
+static int
+ecard_task(void * unused)
+{
+	current->session = 1;
+	current->pgrp = 1;
+
+	/*
+	 * We don't want /any/ signals, not even SIGKILL
+	 */
+	sigfillset(&current->blocked);
+	sigemptyset(&current->signal);
+
+	strcpy(current->comm, "kecardd");
+
+	/*
+	 * Set up the environment
+	 */
+	ecard_init_task();
+
+	while (1) {
+		struct ecard_request *req;
+
+		do {
+			req = xchg(&ecard_req, NULL);
+
+			if (req == NULL) {
+				sigemptyset(&current->signal);
+				interruptible_sleep_on(&ecard_wait);
+			}
+		} while (req == NULL);
+
+		switch (req->req) {
+		case req_readbytes:
+			ecard_task_readbytes(req);
+			break;
+
+		case req_reset:
+			ecard_task_reset(req);
+			break;
+		}
+		wake_up(&ecard_done);
+	}
+}
+
+/*
+ * Wake the expansion card daemon to action our request.
+ *
+ * FIXME: The test here is not sufficient to detect if the
+ * kcardd is running.
+ */
+static inline void
+ecard_call(struct ecard_request *req)
+{
+	/*
+	 * If we're called from task 0, or from an
+	 * interrupt (will be keyboard interrupt),
+	 * we forcefully set up the memory map, and
+	 * call the loader.  We can't schedule, or
+	 * sleep for this call.
+	 */
+	if ((current == task[0] || in_interrupt()) &&
+	    req->req == req_reset && req->ec == NULL) {
+		ecard_init_task();
+		ecard_task_reset(req);
+	} else {
+		if (ecard_pid <= 0)
+			ecard_pid = kernel_thread(ecard_task, NULL, 0);
+
+		ecard_req = req;
+
+		wake_up(&ecard_wait);
+
+		sleep_on(&ecard_done);
+	}
+}
+#else
+/*
+ * On 26-bit processors, we don't need the kcardd thread to access the
+ * expansion card loaders.  We do it directly.
+ */
+static inline void
+ecard_call(struct ecard_request *req)
+{
+	if (req->req == req_reset)
+		ecard_task_reset(req);
+	else
+		ecard_task_readbytes(req);
+}
+#endif
+
+/* ======================= Mid-level card control ===================== */
+/*
+ * This is called to reset the loaders for each expansion card on reboot.
+ *
+ * This is required to make sure that the card is in the correct state
+ * that RiscOS expects it to be.
+ */
+void
+ecard_reset(int slot)
+{
+	struct ecard_request req;
+
+	req.req = req_reset;
+
+	if (slot < 0)
+		req.ec = NULL;
+	else
+		req.ec = slot_to_ecard(slot);
+
+	ecard_call(&req);
+
+#ifdef HAS_EXPMASK
+	if (have_expmask && slot < 0) {
+		have_expmask |= ~0;
+		EXPMASK_ENABLE = have_expmask;
+	}
+#endif
+}
+
+static void
+ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld)
+{
+	struct ecard_request req;
+
+	req.req		= req_readbytes;
+	req.ec		= ec;
+	req.address	= off;
+	req.length	= len;
+	req.use_loader	= useld;
+	req.buffer	= addr;
+
+	ecard_call(&req);
+}
+
+int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num)
+{
+	struct ex_chunk_dir excd;
+	int index = 16;
+	int useld = 0;
+
+	if (!ec->cid.cd)
+		return 0;
+
+	while(1) {
+		ecard_readbytes(&excd, ec, index, 8, useld);
+		index += 8;
+		if (c_id(&excd) == 0) {
+			if (!useld && ec->loader) {
+				useld = 1;
+				index = 0;
+				continue;
+			}
+			return 0;
+		}
+		if (c_id(&excd) == 0xf0) { /* link */
+			index = c_start(&excd);
+			continue;
+		}
+		if (c_id(&excd) == 0x80) { /* loader */
+			if (!ec->loader) {
+				ec->loader = (loader_t)kmalloc(c_len(&excd),
+							       GFP_KERNEL);
+				if (ec->loader)
+					ecard_readbytes(ec->loader, ec,
+							(int)c_start(&excd),
+							c_len(&excd), useld);
+				else
+					return 0;
+			}
+			continue;
+		}
+		if (c_id(&excd) == id && num-- == 0)
+			break;
+	}
+
+	if (c_id(&excd) & 0x80) {
+		switch (c_id(&excd) & 0x70) {
+		case 0x70:
+			ecard_readbytes((unsigned char *)excd.d.string, ec,
+					(int)c_start(&excd), c_len(&excd),
+					useld);
+			break;
+		case 0x00:
+			break;
+		}
+	}
+	cd->start_offset = c_start(&excd);
+	memcpy(cd->d.string, excd.d.string, 256);
+	return 1;
+}
+
+/* ======================= Interrupt control ============================ */
 
 static void ecard_def_irq_enable(ecard_t *ec, int irqnr)
 {
@@ -100,6 +486,11 @@ static void ecard_def_irq_disable(ecard_t *ec, int irqnr)
 #endif
 }
 
+static int ecard_def_irq_pending(ecard_t *ec)
+{
+	return !ec->irqmask || ec->irqaddr[0] & ec->irqmask;
+}
+
 static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr)
 {
 	panic("ecard_def_fiq_enable called - impossible");
@@ -110,11 +501,18 @@ static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr)
 	panic("ecard_def_fiq_disable called - impossible");
 }
 
+static int ecard_def_fiq_pending(ecard_t *ec)
+{
+	return !ec->fiqmask || ec->fiqaddr[0] & ec->fiqmask;
+}
+
 static expansioncard_ops_t ecard_default_ops = {
 	ecard_def_irq_enable,
 	ecard_def_irq_disable,
+	ecard_def_irq_pending,
 	ecard_def_fiq_enable,
-	ecard_def_fiq_disable
+	ecard_def_fiq_disable,
+	ecard_def_fiq_pending
 };
 
 /*
@@ -125,10 +523,9 @@ static expansioncard_ops_t ecard_default_ops = {
  */
 void ecard_enableirq(unsigned int irqnr)
 {
-	irqnr &= 7;
-	if (irqnr < MAX_ECARDS && irqno_to_expcard[irqnr] != -1) {
-		ecard_t *ec = expcard + irqno_to_expcard[irqnr];
+	ecard_t *ec = slot_to_ecard(irqnr - 32);
 
+	if (ec) {
 		if (!ec->ops)
 			ec->ops = &ecard_default_ops;
 
@@ -142,10 +539,9 @@ void ecard_enableirq(unsigned int irqnr)
 
 void ecard_disableirq(unsigned int irqnr)
 {
-	irqnr &= 7;
-	if (irqnr < MAX_ECARDS && irqno_to_expcard[irqnr] != -1) {
-		ecard_t *ec = expcard + irqno_to_expcard[irqnr];
+	ecard_t *ec = slot_to_ecard(irqnr - 32);
 
+	if (ec) {
 		if (!ec->ops)
 			ec->ops = &ecard_default_ops;
 
@@ -156,10 +552,9 @@ void ecard_disableirq(unsigned int irqnr)
 
 void ecard_enablefiq(unsigned int fiqnr)
 {
-	fiqnr &= 7;
-	if (fiqnr < MAX_ECARDS && irqno_to_expcard[fiqnr] != -1) {
-		ecard_t *ec = expcard + irqno_to_expcard[fiqnr];
+	ecard_t *ec = slot_to_ecard(fiqnr);
 
+	if (ec) {
 		if (!ec->ops)
 			ec->ops = &ecard_default_ops;
 
@@ -173,10 +568,9 @@ void ecard_enablefiq(unsigned int fiqnr)
 
 void ecard_disablefiq(unsigned int fiqnr)
 {
-	fiqnr &= 7;
-	if (fiqnr < MAX_ECARDS && irqno_to_expcard[fiqnr] != -1) {
-		ecard_t *ec = expcard + irqno_to_expcard[fiqnr];
+	ecard_t *ec = slot_to_ecard(fiqnr);
 
+	if (ec) {
 		if (!ec->ops)
 			ec->ops = &ecard_default_ops;
 
@@ -185,41 +579,89 @@ void ecard_disablefiq(unsigned int fiqnr)
 	}
 }
 
-static void ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs)
+static void
+ecard_dump_irq_state(ecard_t *ec)
 {
-	const int num_cards = ecard_numirqcards;
-	int i, called = 0;
+	printk("  %d: %sclaimed, ",
+	       ec->slot_no,
+	       ec->claimed ? "" : "not ");
+
+	if (ec->ops && ec->ops->irqpending &&
+	    ec->ops != &ecard_default_ops)
+		printk("irq %spending\n",
+		       ec->ops->irqpending(ec) ? "" : "not ");
+	else
+		printk("irqaddr %p, mask = %02X, status = %02X\n",
+		       ec->irqaddr, ec->irqmask, *ec->irqaddr);
+}
 
-	for (i = 0; i < num_cards; i++) {
-		if (expcard[i].claimed && expcard[i].irq &&
-		    (!expcard[i].irqmask ||
-		     expcard[i].irqaddr[0] & expcard[i].irqmask)) {
-			do_ecard_IRQ(expcard[i].irq, regs);
-			called ++;
+static void
+ecard_check_lockup(void)
+{
+	static int last, lockup;
+	ecard_t *ec;
+
+	/*
+	 * If the timer interrupt has not run since the last million
+	 * unrecognised expansion card interrupts, then there is
+	 * something seriously wrong.  Disable the expansion card
+	 * interrupts so at least we can continue.
+	 *
+	 * Maybe we ought to start a timer to re-enable them some time
+	 * later?
+	 */
+	if (last == jiffies) {
+		lockup += 1;
+		if (lockup > 1000000) {
+			printk(KERN_ERR "\nInterrupt lockup detected - "
+			       "disabling all expansion card interrupts\n");
+
+			disable_irq(IRQ_EXPANSIONCARD);
+
+			printk("Expansion card IRQ state:\n");
+
+			for (ec = cards; ec; ec = ec->next)
+				ecard_dump_irq_state(ec);
 		}
+	} else
+		lockup = 0;
+
+	/*
+	 * If we did not recognise the source of this interrupt,
+	 * warn the user, but don't flood the user with these messages.
+	 */
+	if (!last || time_after(jiffies, last + 5*HZ)) {
+		last = jiffies;
+		printk(KERN_WARNING "Unrecognised interrupt from backplane\n");
 	}
-	cli();
-	if (called == 0) {
-		static int last, lockup;
-
-		if (last == jiffies) {
-			lockup += 1;
-			if (lockup > 1000000) {
-				printk(KERN_ERR "\nInterrupt lockup detected - disabling expansion card IRQs\n");
-				disable_irq(intr_no);
-				printk("Expansion card IRQ state:\n");
-				for (i = 0; i < num_cards; i++)
-					printk("  %d: %sclaimed, irqaddr = %p, irqmask = %X, status=%X\n", expcard[i].irq - 32,
-						expcard[i].claimed ? "" : "not", expcard[i].irqaddr, expcard[i].irqmask, *expcard[i].irqaddr);
-			}
-		} else
-			lockup = 0;
+}
+
+static void
+ecard_irq_noexpmask(int intr_no, void *dev_id, struct pt_regs *regs)
+{
+	ecard_t *ec;
+	int called = 0;
+
+	for (ec = cards; ec; ec = ec->next) {
+		int pending;
+
+		if (!ec->claimed || ec->irq == NO_IRQ || ec->slot_no == 8)
+			continue;
 
-		if (!last || time_after(jiffies, last + 5*HZ)) {
-			last = jiffies;
-			printk(KERN_ERR "\nUnrecognised interrupt from backplane\n");
+		if (ec->ops && ec->ops->irqpending)
+			pending = ec->ops->irqpending(ec);
+		else
+			pending = ecard_default_ops.irqpending(ec);
+
+		if (pending) {
+			do_ecard_IRQ(ec->irq, regs);
+			called ++;
 		}
 	}
+	cli();
+
+	if (called == 0)
+		ecard_check_lockup();
 }
 
 #ifdef HAS_EXPMASK
@@ -234,31 +676,35 @@ static unsigned char first_set[] =
 	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00
 };
 
-static void ecard_irq_expmask(int intr_no, void *dev_id, struct pt_regs *regs)
+static void
+ecard_irq_expmask(int intr_no, void *dev_id, struct pt_regs *regs)
 {
 	const unsigned int statusmask = 15;
 	unsigned int status;
 
 	status = EXPMASK_STATUS & statusmask;
 	if (status) {
-		unsigned int irqno;
+		unsigned int slot;
 		ecard_t *ec;
 again:
-		irqno = first_set[status];
-		ec = expcard + irqno_to_expcard[irqno];
+		slot = first_set[status];
+		ec = slot_to_ecard(slot);
 		if (ec->claimed) {
 			unsigned int oldexpmask;
 			/*
-			 * this ugly code is so that we can operate a prioritorising system.
+			 * this ugly code is so that we can operate a
+			 * prioritorising system:
+			 *
 			 * Card 0 	highest priority
 			 * Card 1
 			 * Card 2
 			 * Card 3	lowest priority
+			 *
 			 * Serial cards should go in 0/1, ethernet/scsi in 2/3
 			 * otherwise you will lose serial data at high speeds!
 			 */
 			oldexpmask = have_expmask;
-			EXPMASK_ENABLE = (have_expmask &= priority_masks[irqno]);
+			EXPMASK_ENABLE = (have_expmask &= priority_masks[slot]);
 			sti();
 			do_ecard_IRQ(ec->irq, regs);
 			cli();
@@ -267,15 +713,18 @@ again:
 			if (status)
 				goto again;
 		} else {
-			printk(KERN_WARNING "card%d: interrupt from unclaimed card???\n", irqno);
-			EXPMASK_ENABLE = (have_expmask &= ~(1 << irqno));
+			printk(KERN_WARNING "card%d: interrupt from unclaimed "
+			       "card???\n", slot);
+			EXPMASK_ENABLE = (have_expmask &= ~(1 << slot));
 		}
 	} else
 		printk(KERN_WARNING "Wild interrupt from backplane (masks)\n");
 }
 
-static int ecard_checkirqhw(void)
+__initfunc(static void
+ecard_probeirqhw(void))
 {
+	ecard_t *ec;
 	int found;
 
 	EXPMASK_ENABLE = 0x00;
@@ -283,62 +732,80 @@ static int ecard_checkirqhw(void)
 	found = ((EXPMASK_STATUS & 15) == 0);
 	EXPMASK_ENABLE = 0xff;
 
-	return found;
+	if (!found)
+		return;
+
+	printk(KERN_DEBUG "Expansion card interrupt "
+	       "management hardware found\n");
+
+	irqexpansioncard.handler = ecard_irq_expmask;
+
+	/* for each card present, set a bit to '1' */
+	have_expmask = 0x80000000;
+
+	for (ec = cards; ec; ec = ec->next)
+		have_expmask |= 1 << ec->slot_no;
+
+	EXPMASK_ENABLE = have_expmask;
 }
+#else
+#define ecard_probeirqhw()
+#endif
+
+#ifndef IO_EC_MEMC8_BASE
+#define IO_EC_MEMC8_BASE 0
 #endif
 
-static void ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld)
+unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
 {
-	extern int ecard_loader_read(int off, volatile unsigned int pa, loader_t loader);
-	unsigned char *a = (unsigned char *)addr;
+	unsigned long address = 0;
+	int slot = ec->slot_no;
 
-	if (ec->slot_no == 8) {
-		static unsigned int lowaddress;
-		unsigned int laddr, haddr;
-		unsigned char byte = 0; /* keep gcc quiet */
+	if (ec->slot_no == 8)
+		return IO_EC_MEMC8_BASE;
 
-		laddr = off & 4095;	/* number of bytes to read from offset + base addr */
-		haddr = off >> 12;	/* offset into card from base addr */
+	ectcr &= ~(1 << slot);
 
-		if (haddr > 256)
-			return;
+	switch (type) {
+	case ECARD_MEMC:
+		if (slot < 4)
+			address = IO_EC_MEMC_BASE + (slot << 12);
+		break;
 
-		/*
-		 * If we require a low address or address 0, then reset, and start again...
-		 */
-		if (!off || lowaddress > laddr) {
-			outb(0, ec->podaddr);
-			lowaddress = 0;
-		}
-		while (lowaddress <= laddr) {
-			byte = inb(ec->podaddr + haddr);
-			lowaddress += 1;
-		}
-		while (len--) {
-			*a++ = byte;
-			if (len) {
-				byte = inb(ec->podaddr + haddr);
-				lowaddress += 1;
-			}
-		}
-	} else {
-		if (!useld || !ec->loader) {
-			while(len--)
-				*a++ = inb(ec->podaddr + (off++));
-		} else {
-			while(len--) {
-				*(unsigned long *)0x108 = 0; /* hack for some loaders!!! */
-				*a++ = ecard_loader_read(off++, BUS_ADDR(ec->podaddr), ec->loader);
-			}
-		}
+	case ECARD_IOC:
+		if (slot < 4)
+			address = IO_EC_IOC_BASE + (slot << 12);
+#ifdef IO_EC_IOC4_BASE
+		else
+			address = IO_EC_IOC4_BASE + ((slot - 4) << 12);
+#endif
+		if (address)
+			address +=  speed << 17;
+		break;
+
+#ifdef IO_EC_EASI_BASE
+	case ECARD_EASI:
+		address = IO_EC_EASI_BASE + (slot << 22);
+		if (speed == ECARD_FAST)
+			ectcr |= 1 << slot;
+		break;
+#endif
 	}
+
+#ifdef IOMD_ECTCR
+	outb(ectcr, IOMD_ECTCR);
+#endif
+	return address;
 }
 
+static const char *unknown = "*unknown*";
+
 static int ecard_prints(char *buffer, ecard_t *ec)
 {
 	char *start = buffer;
 
-	buffer += sprintf(buffer, "\n  %d: ", ec->slot_no);
+	buffer += sprintf(buffer, "  %d: %s ", ec->slot_no,
+			  ec->type == ECARD_EASI ? "EASI" : "    ");
 
 	if (ec->cid.id == 0) {
 		struct in_chunk_dir incd;
@@ -346,28 +813,57 @@ static int ecard_prints(char *buffer, ecard_t *ec)
 		buffer += sprintf(buffer, "[%04X:%04X] ",
 			ec->cid.manufacturer, ec->cid.product);
 
-		if (!ec->card_desc && ec->cid.is && ec->cid.cd &&
-		    ecard_readchunk(&incd, ec, 0xf5, 0))
-			ec->card_desc = incd.d.string;
+		if (!ec->card_desc && ec->cid.cd &&
+		    ecard_readchunk(&incd, ec, 0xf5, 0)) {
+			ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL);
 
-		if (!ec->card_desc)
-			ec->card_desc = "*unknown*";
+			if (ec->card_desc)
+				strcpy(ec->card_desc, incd.d.string);
+		}
 
-		buffer += sprintf(buffer, "%s", ec->card_desc);
+		buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*");
 	} else
-		buffer += sprintf(buffer, "Simple card %d", ec->cid.id);
+		buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id);
 
 	return buffer - start;
 }
 
-static inline unsigned short ecard_getu16(unsigned char *v)
+int get_ecard_dev_info(char *buf, char **start, off_t pos, int count, int wr)
 {
-	return v[0] | v[1] << 8;
+	ecard_t *ec = cards;
+	off_t at = 0;
+	int len, cnt;
+
+	cnt = 0;
+	while (ec && count > cnt) {
+		len = ecard_prints(buf, ec);
+		at += len;
+		if (at >= pos) {
+			if (!*start) {
+				*start = buf + (pos - (at - len));
+				cnt = at - pos;
+			} else
+				cnt += len;
+			buf += len;
+		}
+		ec = ec->next;
+	}
+	return (count > cnt) ? cnt : count;
 }
 
-static inline signed long ecard_gets24(unsigned char *v)
+static struct proc_dir_entry proc_ecard_devices = {
+	PROC_BUS_ECARD_DEVICES, 7, "devices",
+	S_IFREG | S_IRUGO, 1, 0, 0,
+	0, &proc_array_inode_operations,
+	get_ecard_dev_info
+};
+
+static struct proc_dir_entry *proc_bus_ecard_dir;
+
+static void ecard_proc_init(void)
 {
-	return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0);
+	proc_bus_ecard_dir = create_proc_entry("ecard", S_IFDIR, proc_bus);
+	proc_register(proc_bus_ecard_dir, &proc_ecard_devices);
 }
 
 /*
@@ -376,33 +872,39 @@ static inline signed long ecard_gets24(unsigned char *v)
  * If bit 1 of the first byte of the card is set, then the
  * card does not exist.
  */
-__initfunc(static int ecard_probe(int card, int freeslot, card_type_t type))
+__initfunc(static int
+ecard_probe(int slot, card_type_t type))
 {
-	ecard_t *ec = expcard + freeslot;
+	ecard_t **ecp;
+	ecard_t *ec;
 	struct ex_ecid cid;
 	char buffer[200];
-	int i;
+	int i, rc = -ENOMEM;
+
+	ec = kmalloc(sizeof(ecard_t), GFP_KERNEL);
+
+	if (!ec)
+		goto nodev;
 
-	irqno_to_expcard[card] = -1;
+	memset(ec, 0, sizeof(ecard_t));
 
-	ec->slot_no	= card;
+	ec->slot_no	= slot;
+	ec->type	= type;
 	ec->irq		= NO_IRQ;
 	ec->fiq		= NO_IRQ;
 	ec->dma		= NO_DMA;
 	ec->card_desc	= NULL;
 	ec->ops		= &ecard_default_ops;
 
+	rc = -ENODEV;
 	if ((ec->podaddr = ecard_address(ec, type, ECARD_SYNC)) == 0)
-		return 0;
+		goto nodev;
 
 	cid.r_zero = 1;
 	ecard_readbytes(&cid, ec, 0, 16, 0);
 	if (cid.r_zero)
-		return 0;
+		goto nodev;
 
-	irqno_to_expcard[card] = freeslot;
-
-	ec->type	= type;
 	ec->cid.id	= cid.r_id;
 	ec->cid.cd	= cid.r_cd;
 	ec->cid.is	= cid.r_is;
@@ -415,9 +917,9 @@ __initfunc(static int ecard_probe(int card, int freeslot, card_type_t type))
 	ec->cid.fiqmask = cid.r_fiqmask;
 	ec->cid.fiqoff  = ecard_gets24(cid.r_fiqoff);
 	ec->fiqaddr	=
-	ec->irqaddr	= (unsigned char *)BUS_ADDR(ec->podaddr);
+	ec->irqaddr	= (unsigned char *)ioaddr(ec->podaddr);
 
-	if (ec->cid.cd && ec->cid.is) {
+	if (ec->cid.is) {
 		ec->irqmask = ec->cid.irqmask;
 		ec->irqaddr += ec->cid.irqoff;
 		ec->fiqmask = ec->cid.fiqmask;
@@ -430,88 +932,69 @@ __initfunc(static int ecard_probe(int card, int freeslot, card_type_t type))
 	for (i = 0; i < sizeof(blacklist) / sizeof(*blacklist); i++)
 		if (blacklist[i].manufacturer == ec->cid.manufacturer &&
 		    blacklist[i].product == ec->cid.product) {
-			ec->loader = blacklist[i].loader;
 			ec->card_desc = blacklist[i].type;
 			break;
 		}
 
-	ecard_prints(buffer, ec);
-	printk("%s", buffer);
-
-	ec->irq = 32 + card;
+	ec->irq = 32 + slot;
 #ifdef IO_EC_MEMC8_BASE
-	if (card == 8)
+	if (slot == 8)
 		ec->irq = 11;
 #endif
 #ifdef CONFIG_ARCH_RPC
 	/* On RiscPC, only first two slots have DMA capability */
-	if (card < 2)
-		ec->dma = 2 + card;
+	if (slot < 2)
+		ec->dma = 2 + slot;
 #endif
 #if 0	/* We don't support FIQs on expansion cards at the moment */
-	ec->fiq = 96 + card;
+	ec->fiq = 96 + slot;
 #endif
 
-	return 1;
-}
+	rc = 0;
 
-/*
- * This is called to reset the loaders for each expansion card on reboot.
- *
- * This is required to make sure that the card is in the correct state
- * that RiscOS expects it to be.
- */
-void ecard_reset(int card)
-{
-	extern int ecard_loader_reset(volatile unsigned int pa, loader_t loader);
+	for (ecp = &cards; *ecp; ecp = &(*ecp)->next);
 
-	if (card >= ecard_numcards)
-		return;
-    
-	if (card < 0) {
-		for (card = 0; card < ecard_numcards; card++)
-			if (expcard[card].loader)
-				ecard_loader_reset(BUS_ADDR(expcard[card].podaddr),
-							expcard[card].loader);
-	} else
-		if (expcard[card].loader)
-			ecard_loader_reset(BUS_ADDR(expcard[card].podaddr),
-						expcard[card].loader);
+	*ecp = ec;
 
-#ifdef HAS_EXPMASK
-	if (have_expmask) {
-		have_expmask |= ~0;
-		EXPMASK_ENABLE = have_expmask;
+nodev:
+	if (rc && ec)
+		kfree(ec);
+	else {
+		slot_to_expcard[slot] = ec;
+
+		ecard_prints(buffer, ec);
+		printk("%s", buffer);
 	}
-#endif
+	return rc;
 }
 
-static unsigned int ecard_startcard;
+static ecard_t *finding_pos;
 
 void ecard_startfind(void)
 {
-	ecard_startcard = 0;
+	finding_pos = NULL;
 }
 
 ecard_t *ecard_find(int cid, const card_ids *cids)
 {
-	int card;
+	if (!finding_pos)
+		finding_pos = cards;
+	else
+		finding_pos = finding_pos->next;
+
+	for (; finding_pos; finding_pos = finding_pos->next) {
+		if (finding_pos->claimed)
+			continue;
 
-	if (!cids) {
-		for (card = ecard_startcard; card < ecard_numcards; card++)
-			if (!expcard[card].claimed &&
-			    (expcard[card].cid.id ^ cid) == 0)
+		if (!cids) {
+			if ((finding_pos->cid.id ^ cid) == 0)
 				break;
-	} else {
-		for (card = ecard_startcard; card < ecard_numcards; card++) {
+		} else {
 			unsigned int manufacturer, product;
 			int i;
 
-			if (expcard[card].claimed)
-				continue;
-
-			manufacturer = expcard[card].cid.manufacturer;
-			product = expcard[card].cid.product;
+			manufacturer = finding_pos->cid.manufacturer;
+			product = finding_pos->cid.product;
 
 			for (i = 0; cids[i].manufacturer != 65535; i++)
 				if (manufacturer == cids[i].manufacturer &&
@@ -523,111 +1006,24 @@ ecard_t *ecard_find(int cid, const card_ids *cids)
 		}
 	}
 
-	ecard_startcard = card + 1;
-
-	return card < ecard_numcards ? &expcard[card] : NULL;
+	return finding_pos;
 }
 
-int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num)
+__initfunc(static void ecard_free_all(void))
 {
-	struct ex_chunk_dir excd;
-	int index = 16;
-	int useld = 0;
+	ecard_t *ec, *ecn;
 
-	if (!ec->cid.is || !ec->cid.cd)
-		return 0;
-
-	while(1) {
-		ecard_readbytes(&excd, ec, index, 8, useld);
-		index += 8;
-		if (c_id(&excd) == 0) {
-			if (!useld && ec->loader) {
-				useld = 1;
-				index = 0;
-				continue;
-			}
-			return 0;
-		}
-		if (c_id(&excd) == 0xf0) { /* link */
-			index = c_start(&excd);
-			continue;
-		}
-		if (c_id(&excd) == 0x80) { /* loader */
-			if (!ec->loader) {
-				ec->loader = (loader_t)kmalloc(c_len(&excd), GFP_KERNEL);
-				ecard_readbytes(ec->loader, ec, (int)c_start(&excd), c_len(&excd), useld);
-			}
-			continue;
-		}
-		if (c_id(&excd) == id && num-- == 0)
-			break;
-	}
+	for (ec = cards; ec; ec = ecn) {
+		ecn = ec->next;
 
-	if (c_id(&excd) & 0x80) {
-		switch (c_id(&excd) & 0x70) {
-		case 0x70:
-			ecard_readbytes((unsigned char *)excd.d.string, ec,
-					(int)c_start(&excd), c_len(&excd), useld);
-			break;
-		case 0x00:
-			break;
-		}
+		kfree(ec);
 	}
-	cd->start_offset = c_start(&excd);
-	memcpy(cd->d.string, excd.d.string, 256);
-	return 1;
-}
-
-unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
-{
-	switch (ec->slot_no) {
-	case 0 ... 3:
-		switch (type) {
-		case ECARD_MEMC:
-			return IO_EC_MEMC_BASE + (ec->slot_no << 12);
-
-		case ECARD_IOC:
-			return IO_EC_IOC_BASE + (speed << 17) + (ec->slot_no << 12);
-
-#ifdef IO_EC_EASI_BASE
-		case ECARD_EASI:
-			return IO_EC_EASI_BASE + (ec->slot_no << 22);
-#endif
-		}
-		break;
 
-	case 4 ... 7:
-		switch (type) {
-#ifdef IO_EC_IOC4_BASE
-		case ECARD_IOC:
-			return IO_EC_IOC4_BASE + (speed << 17) + ((ec->slot_no - 4) << 12);
-#endif
-#ifdef IO_EC_EASI_BASE
-		case ECARD_EASI:
-			return IO_EC_EASI_BASE + (ec->slot_no << 22);
-#endif
-		default:
-			break;
-		}
-		break;
+	cards = NULL;
 
-#ifdef IO_EC_MEMC8_BASE
-	case 8:
-		return IO_EC_MEMC8_BASE;
-#endif
-	}
-	return 0;
+	memset(slot_to_expcard, 0, sizeof(slot_to_expcard));
 }
 
-static struct irqaction irqexpansioncard = {
-	ecard_irq_noexpmask,
-	SA_INTERRUPT,
-	0,
-	"expansion cards",
-	NULL,
-	NULL
-};
-
 /*
  * Initialise the expansion card system.
  * Locate all hardware - interrupt management and
@@ -635,51 +1031,38 @@ static struct irqaction irqexpansioncard = {
  */
 __initfunc(void ecard_init(void))
 {
-	int i, nc = 0;
+	int slot;
 
-	memset(expcard, 0, sizeof(expcard));
+	oldlatch_init();
 
-#ifdef HAS_EXPMASK
-	if (ecard_checkirqhw()) {
-		printk(KERN_DEBUG "Expansion card interrupt management hardware found\n");
-		irqexpansioncard.handler = ecard_irq_expmask;
-		irqexpansioncard.flags |= SA_IRQNOMASK;
-		have_expmask = -1;
-	}
+#ifdef CONFIG_CPU_32
+	init_waitqueue_head(&ecard_wait);
+	init_waitqueue_head(&ecard_done);
 #endif
 
-	printk("Installed expansion cards:");
+	printk("Probing expansion cards: (does not imply support)\n");
 
-	/*
-	 * First of all, probe all cards on the expansion card interrupt line
-	 */
-	for (i = 0; i < 8; i++)
-		if (ecard_probe(i, nc, ECARD_IOC) || ecard_probe(i, nc, ECARD_EASI))
-			nc += 1;
-		else
-			have_expmask &= ~(1<<i);
-
-	ecard_numirqcards = nc;
+	for (slot = 0; slot < 8; slot ++) {
+		if (ecard_probe(slot, ECARD_EASI) == -ENODEV)
+			ecard_probe(slot, ECARD_IOC);
+	}
 
-	/* Now probe other cards with different interrupt lines
-	 */
 #ifdef IO_EC_MEMC8_BASE
-	if (ecard_probe(8, nc, ECARD_IOC))
-		nc += 1;
+	ecard_probe(8, ECARD_IOC);
 #endif
 
-	printk("\n");
-	ecard_numcards = nc;
+	ecard_probeirqhw();
 
-	if (nc && setup_arm_irq(IRQ_EXPANSIONCARD, &irqexpansioncard)) {
-		printk("Could not allocate interrupt for expansion cards\n");
-		return;
+	if (setup_arm_irq(IRQ_EXPANSIONCARD, &irqexpansioncard)) {
+		printk(KERN_ERR "Unable to claim IRQ%d for expansion cards\n",
+		       IRQ_EXPANSIONCARD);
+		ecard_free_all();
 	}
-	
-#ifdef HAS_EXPMASK
-	if (nc && have_expmask)
-		EXPMASK_ENABLE = have_expmask;
-#endif
 
-	oldlatch_init();
+	ecard_proc_init();
 }
+
+EXPORT_SYMBOL(ecard_startfind);
+EXPORT_SYMBOL(ecard_find);
+EXPORT_SYMBOL(ecard_readchunk);
+EXPORT_SYMBOL(ecard_address);
diff --git a/arch/arm/kernel/entry-armo.S b/arch/arm/kernel/entry-armo.S
index 3ca29cd2c..758163f07 100644
--- a/arch/arm/kernel/entry-armo.S
+++ b/arch/arm/kernel/entry-armo.S
@@ -159,8 +159,8 @@ irq_prio_h:	.byte	 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
 		.macro	restore_user_regs
 		ldmia	sp, {r0 - lr}^
 		mov	r0, r0
-		add	sp, sp, #15*4
-		ldr	lr, [sp], #8
+		ldr	lr, [sp, #15*4]
+		add	sp, sp, #15*4+8
 		movs	pc, lr
 		.endm
 
@@ -226,13 +226,6 @@ irq_prio_h:	.byte	 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
 		str	r0, [sp, #S_OLD_R0]	;\
 		mov	fp, #0
 
-#define USER_RESTORE_ALL			\
-		ldmia	sp, {r0 - lr}^		;\
-		mov	r0, r0			;\
-		add	sp, sp, #15*4		;\
-		ldr	lr, [sp], #8		;\
-		movs	pc, lr
-
 #define SVC_RESTORE_ALL				\
 		ldmfd	sp, {r0 - pc}^
 		
@@ -253,7 +246,7 @@ _unexp_fiq:	ldr	sp, .LCfiq
 		mov	r0, r0
 		movs	pc, lr
 
-Lfiqmsg:	.ascii	"*** Unexpeced FIQ\n\0"
+Lfiqmsg:	.ascii	"*** Unexpected FIQ\n\0"
 		.align
 
 .LCfiq:		.word	__temp_fiq
@@ -315,14 +308,14 @@ fpe_not_present:
 		and	r4, r10, #255			@ get offset
 		and	r6, r10, #0x000f0000
 		tst	r10, #0x00800000		@ +/-
-		rsbeq	r4, r4, #0
 		ldr	r5, [sp, r6, lsr #14]		@ Load reg
+		rsbeq	r4, r4, #0
 		add	r5, r5, r4, lsl #2
 		str	r5, [sp, r6, lsr #14]		@ Save reg
 		b	ret_from_exception
 
-wfs_mask_data:	.word	0x0e200110			@ WFS
-		.word	0x0fff0fff
+wfs_mask_data:	.word	0x0e200110			@ WFS/RFS
+		.word	0x0fef0fff
 		.word	0x0d0d0100			@ LDF [sp]/STF [sp]
 		.word	0x0d0b0100			@ LDF [fp]/STF [fp]
 		.word	0x0f0f0f00
@@ -341,8 +334,7 @@ vector_prefetch:
 		save_user_regs
 		teqp	pc, #0x00000003		@ NOT a problem - doesnt change mode
 		mask_pc	r0, lr			@ Address of abort
-		mov	r1, #FAULT_CODE_PREFETCH|FAULT_CODE_USER @ Error code
-		mov	r2, sp			@ Tasks registers
+		mov	r1, sp			@ Tasks registers
 		bl	SYMBOL_NAME(do_PrefetchAbort)
 		teq	r0, #0			@ If non-zero, we believe this abort..
 		bne	ret_from_sys_call
@@ -451,6 +443,7 @@ vector_IRQ:	ldr	r13, .LCirq			@ I will leave this one in just in case...
 		adr	lr, 1b
 		orr	lr, lr, #0x08000003		@ Force SVC
 		bne	do_IRQ
+		mov	r4, #0
 		b	ret_with_reschedule
 
 		irq_prio_table
@@ -562,8 +555,8 @@ Ldata_ldrstr_numindex:
 		and	r0, r0, #15 << 2	@ Mask out reg.
 		teq	r0, #15 << 2
 		ldr	r0, [r3, r0]		@ Get register
-		biceq	r0, r0, #PCMASK
 		mov	r1, r4, lsl #20
+		biceq	r0, r0, #PCMASK
 		tst	r4, #1 << 23
 		addne	r0, r0, r1, lsr #20
 		subeq	r0, r0, r1, lsr #20
@@ -578,12 +571,12 @@ Ldata_ldrstr_regindex:
 		and	r0, r0, #15 << 2	@ Mask out reg.
 		teq	r0, #15 << 2
 		ldr	r0, [r3, r0]		@ Get register
-		biceq	r0, r0, #PCMASK
 		and	r7, r4, #15
+		biceq	r0, r0, #PCMASK
 		teq	r7, #15			@ Check for PC
 		ldr	r7, [r3, r7, lsl #2]	@ Get Rm
-		biceq	r7, r7, #PCMASK
 		and	r8, r4, #0x60		@ Get shift types
+		biceq	r7, r7, #PCMASK
 		mov	r9, r4, lsr #7		@ Get shift amount
 		and	r9, r9, #31
 		teq	r8, #0
@@ -645,8 +638,8 @@ Ldata_ldcstc_pre:
 		and	r0, r0, #15 << 2	@ Mask out reg.
 		teq	r0, #15 << 2
 		ldr	r0, [r3, r0]		@ Get register
-		biceq	r0, r0, #PCMASK
 		mov	r1, r4, lsl #24		@ Get offset
+		biceq	r0, r0, #PCMASK
 		tst	r4, #1 << 23
 		addne	r0, r0, r1, lsr #24
 		subeq	r0, r0, r1, lsr #24
@@ -656,9 +649,54 @@ Ldata_ldcstc_pre:
 #endif
 		b	SYMBOL_NAME(do_DataAbort)
 
-#include "entry-common.S"
+/*
+ *=============================================================================
+ *		Low-level interface code
+ *-----------------------------------------------------------------------------
+ *		Trap initialisation
+ *-----------------------------------------------------------------------------
+ *
+ * Note - FIQ code has changed.  The default is a couple of words in 0x1c, 0x20
+ * that call _unexp_fiq.  Nowever, we now copy the FIQ routine to 0x1c (removes
+ * some excess cycles).
+ *
+ * What we need to put into 0-0x1c are branches to branch to the kernel.
+ */
 
-		.data
+		.section ".text.init",#alloc,#execinstr
+
+.Ljump_addresses:
+		swi	SYS_ERROR0
+		.word	vector_undefinstr	- 12
+		.word	vector_swi		- 16
+		.word	vector_prefetch		- 20
+		.word	vector_data		- 24
+		.word	vector_addrexcptn	- 28
+		.word	vector_IRQ		- 32
+		.word	_unexp_fiq		- 36
+		b	. + 8
+/*
+ * initialise the trap system
+ */
+ENTRY(trap_init)
+		stmfd	sp!, {r4 - r7, lr}
+		adr	r1, .Ljump_addresses
+		ldmia	r1, {r1 - r7, ip, lr}
+		orr	r2, lr, r2, lsr #2
+		orr	r3, lr, r3, lsr #2
+		orr	r4, lr, r4, lsr #2
+		orr	r5, lr, r5, lsr #2
+		orr	r6, lr, r6, lsr #2
+		orr	r7, lr, r7, lsr #2
+		orr	ip, lr, ip, lsr #2
+		mov	r0, #0
+		stmia	r0, {r1 - r7, ip}
+		ldmfd	sp!, {r4 - r7, pc}^
+
+		.text
+
+#include "entry-common.S"
 
-__temp_irq:	.word	0				@ saved lr_irq
+		.bss
+__temp_irq:	.space	4				@ saved lr_irq
 __temp_fiq:	.space	128
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index bcc938b32..9456abe33 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -61,8 +61,12 @@
 #define S_R1		4
 #define S_R0		0
 
+#define OFF_CR_ALIGNMENT(x)	cr_alignment - x
+
 #ifdef IOC_BASE
 /* IOC / IOMD based hardware */
+#include <asm/iomd.h>
+
 		.equ	ioc_base_high, IOC_BASE & 0xff000000
 		.equ	ioc_base_low, IOC_BASE & 0x00ff0000
 		.macro	disable_fiq
@@ -186,113 +190,109 @@ irq_prio_ebsa110:
 		.byte	 6, 6, 6, 6, 2, 2, 2, 2, 3, 3, 6, 6, 2, 2, 2, 2
 		.endm
 
-#elif defined(CONFIG_ARCH_EBSA285)
+#elif defined(CONFIG_HOST_FOOTBRIDGE) || defined(CONFIG_ADDIN_FOOTBRIDGE)
+#include <asm/dec21285.h>
 
 		.macro	disable_fiq
 		.endm
 
+		.equ	irq_mask_pci_err_high, IRQ_MASK_PCI_ERR & 0xff000000
+		.equ	irq_mask_pci_err_low,  IRQ_MASK_PCI_ERR & 0x00ffffff
+		.equ	dc21285_high, ARMCSR_BASE & 0xff000000
+		.equ	dc21285_low, ARMCSR_BASE & 0x00ffffff
+
 		.macro	get_irqnr_and_base, irqnr, irqstat, base
-		mov	r4, #0xfe000000
+		mov	r4, #dc21285_high
+		.if	dc21285_low
+		orr	r4, r4, #dc21285_low
+		.endif
 		ldr	\irqstat, [r4, #0x180]		@ get interrupts
-		mov	\irqnr, #0
-1001:		tst	\irqstat, #1
-		addeq	\irqnr, \irqnr, #1
-		moveq	\irqstat, \irqstat, lsr #1
-		tsteq	\irqnr, #32
-		beq	1001b
-		teq	\irqnr, #32
-		.endm
 
-		.macro	irq_prio_table
-		.endm
-
-#elif defined(CONFIG_ARCH_NEXUSPCI)
+		mov	\irqnr, #IRQ_SDRAMPARITY
+		tst	\irqstat, #IRQ_MASK_SDRAMPARITY
+		bne	1001f
 
-		.macro	disable_fiq
-		.endm
+		tst	\irqstat, #IRQ_MASK_UART_RX
+		movne	\irqnr, #IRQ_CONRX
+		bne	1001f
 
-		.macro	get_irqnr_and_base, irqnr, irqstat, base
-		ldr	r4, =0xffe00000
-		ldr	\irqstat, [r4, #0x180]		@ get interrupts
-		mov	\irqnr, #0
-1001:		tst	\irqstat, #1
-		addeq	\irqnr, \irqnr, #1
-		moveq	\irqstat, \irqstat, lsr #1
-		tsteq	\irqnr, #32
-		beq	1001b
-		teq	\irqnr, #32
-		.endm
+		tst	\irqstat, #IRQ_MASK_DMA1
+		movne	\irqnr, #IRQ_DMA1
+		bne	1001f
 
-		.macro	irq_prio_table
-		.endm
+		tst	\irqstat, #IRQ_MASK_DMA2
+		movne	\irqnr, #IRQ_DMA2
+		bne	1001f
 
-#elif defined(CONFIG_ARCH_VNC)
+		tst	\irqstat, #IRQ_MASK_IN0
+		movne	\irqnr, #IRQ_IN0
+		bne	1001f
 
-		.macro	disable_fiq
-		.endm
+		tst	\irqstat, #IRQ_MASK_IN1
+		movne	\irqnr, #IRQ_IN1
+		bne	1001f
 
-		.equ	pci_iack_high, PCI_IACK & 0xff000000
-		.equ	pci_iack_low,  PCI_IACK & 0x00ff0000
+		tst	\irqstat, #IRQ_MASK_IN2
+		movne	\irqnr, #IRQ_IN2
+		bne	1001f
 
-		.macro	get_irqnr_and_base, irqnr, irqstat, base
-		mov	r4, #IO_BASE_ARM_CSR
-		ldr	\irqstat, [r4, #CSR_IRQ_STATUS]	@ just show us the unmasked ones
+		tst	\irqstat, #IRQ_MASK_IN3
+		movne	\irqnr, #IRQ_IN3
+		bne	1001f
 
-		@ run through hard priorities
-		@ timer
-		tst	\irqstat, #IRQ_MASK_TIMER0
-		movne	\irqnr, #IRQ_TIMER0
+		tst	\irqstat, #IRQ_MASK_PCI
+		movne	\irqnr, #IRQ_PCI
 		bne	1001f
 
-		@ ether10
-		tst	\irqstat, #IRQ_MASK_ETHER10
-		movne	\irqnr, #IRQ_ETHER10
+		tst	\irqstat, #IRQ_MASK_I2OINPOST
+		movne	\irqnr, #IRQ_I2OINPOST
 		bne	1001f
 
-		@ ether100
-		tst	\irqstat, #IRQ_MASK_ETHER100
-		movne	\irqnr, #IRQ_ETHER100
+		tst	\irqstat, #IRQ_MASK_TIMER1
+		movne	\irqnr, #IRQ_TIMER1
 		bne	1001f
 
-		@ video compressor
-		tst	\irqstat, #IRQ_MASK_VIDCOMP
-		movne	\irqnr, #IRQ_VIDCOMP
+		tst	\irqstat, #IRQ_MASK_TIMER2
+		movne	\irqnr, #IRQ_TIMER2
 		bne	1001f
 
-		@ now try all the PIC sources
-		@ determine whether we have an irq
-		tst	\irqstat, #IRQ_MASK_EXTERN_IRQ
-		beq	1002f
-		mov	r4, #pci_iack_high
-		orr	r4, r4, #pci_iack_low
-		ldrb	\irqnr, [r4]			@ get the IACK byte
-		b	1001f
-
-1002:		@ PCI errors
-		tst	\irqstat, #IRQ_MASK_PCI_ERR
-		movne	\irqnr, #IRQ_PCI_ERR
+		tst	\irqstat, #IRQ_MASK_TIMER3
+		movne	\irqnr, #IRQ_TIMER3
 		bne	1001f
 
-		@ softint
-		tst	\irqstat, #IRQ_MASK_SOFTIRQ
-		movne	\irqnr, #IRQ_SOFTIRQ
+		tst	\irqstat, #IRQ_MASK_UART_TX
+		movne	\irqnr, #IRQ_CONTX
 		bne	1001f
 
-		@ debug uart
-		tst	\irqstat, #IRQ_MASK_UART_DEBUG
-		movne	\irqnr, #IRQ_CONRX
+		tst	\irqstat, #irq_mask_pci_err_high
+		tsteq	\irqstat, #irq_mask_pci_err_low
+		movne	\irqnr, #IRQ_PCI_ERR
 		bne	1001f
+1001:
+		.endm
 
-		@ watchdog
-		tst	\irqstat, #IRQ_MASK_WATCHDOG
-		movne	\irqnr, #IRQ_WATCHDOG
+		.macro	irq_prio_table
+		.endm
 
-1001:		@ If Z is set, then we will not enter an interrupt
+#elif defined(CONFIG_ARCH_NEXUSPCI)
+
+		.macro	disable_fiq
 		.endm
 
-		.macro	irq_prio_table
+		.macro	get_irqnr_and_base, irqnr, irqstat, base
+		ldr	r4, =0xffe00000
+		ldr	\irqstat, [r4, #0x180]		@ get interrupts
+		mov	\irqnr, #0
+1001:		tst	\irqstat, #1
+		addeq	\irqnr, \irqnr, #1
+		moveq	\irqstat, \irqstat, lsr #1
+		tsteq	\irqnr, #32
+		beq	1001b
+		teq	\irqnr, #32
 		.endm
 
+		.macro	irq_prio_table
+		.endm
 #else
 #error Unknown architecture
 #endif
@@ -306,22 +306,22 @@ irq_prio_ebsa110:
 		stmia	sp, {r0 - r12}			@ Calling r0 - r12
 		add	r8, sp, #S_PC
 		stmdb	r8, {sp, lr}^			@ Calling sp, lr
-		mov	r7, r0
+		str	lr, [r8, #0]			@ Save calling PC
 		mrs	r6, spsr
-		mov	r5, lr
-		stmia	r8, {r5, r6, r7}		@ Save calling PC, CPSR, OLD_R0
+		str	r6, [r8, #4]			@ Save CPSR
+		str	r0, [r8, #8]			@ Save OLD_R0
 		.endm
 
 		.macro	restore_user_regs
-		mrs	r0, cpsr			@ disable IRQs
-		orr	r0, r0, #I_BIT
-		msr	cpsr, r0
+		mrs	r1, cpsr			@ disable IRQs
+		orr	r1, r1, #I_BIT
 		ldr	r0, [sp, #S_PSR]		@ Get calling cpsr
+		msr	cpsr, r1
 		msr	spsr, r0			@ save in spsr_svc
 		ldmia	sp, {r0 - lr}^			@ Get calling r0 - lr
 		mov	r0, r0
-		add	sp, sp, #S_PC
-		ldr	lr, [sp], #S_FRAME_SIZE - S_PC	@ Get PC and jump over PC, PSR, OLD_R0
+		ldr	lr, [sp, #S_PC]			@ Get PC
+		add	sp, sp, #S_FRAME_SIZE
 		movs	pc, lr				@ return & move spsr_svc into cpsr
 		.endm
 
@@ -348,25 +348,6 @@ irq_prio_ebsa110:
 		msr	cpsr, \temp
 		.endm
 
-		.macro	initialise_traps_extra
-		mrs	r0, cpsr
-		bic	r0, r0, #31
-		orr	r0, r0, #0xd3
-		msr	cpsr, r0
-		.endm
-
-
-#ifndef __ARM_ARCH_4__
-.Larm700bug:	str	lr, [r8]
-		ldr	r0, [sp, #S_PSR]		@ Get calling cpsr
-		msr	spsr, r0
-		ldmia	sp, {r0 - lr}^			@ Get calling r0 - lr
-		mov	r0, r0
-		add	sp, sp, #S_PC
-		ldr	lr, [sp], #S_FRAME_SIZE - S_PC	@ Get PC and jump over PC, PSR, OLD_R0
-		movs	pc, lr
-#endif
-
 		.macro	get_current_task, rd
 		mov	\rd, sp, lsr #13
 		mov	\rd, \rd, lsl #13
@@ -379,231 +360,89 @@ irq_prio_ebsa110:
 		adr\cond	\reg, \label
 		.endm
 
-/*=============================================================================
- * Address exception handler
- *-----------------------------------------------------------------------------
- * These aren't too critical.
- * (they're not supposed to happen, and won't happen in 32-bit mode).
- */
-
-vector_addrexcptn:
-		b	vector_addrexcptn
-
-/*=============================================================================
- * Undefined FIQs
- *-----------------------------------------------------------------------------
- * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
- * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg.
- * Basically to switch modes, we *HAVE* to clobber one register...  brain
- * damage alert!  I don't think that we can execute any code in here in any
- * other mode than FIQ...  Ok you can switch to another mode, but you can't
- * get out of that mode without clobbering one register.
- */
-_unexp_fiq:	disable_fiq
-		subs	pc, lr, #4
-
-/*=============================================================================
- * Interrupt entry dispatcher
- *-----------------------------------------------------------------------------
- * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
- */
-vector_IRQ:	@
-		@ save mode specific registers
-		@
-		ldr	r13, .LCirq
-		sub	lr, lr, #4
-		str	lr, [r13]			@ save lr_IRQ
-		mrs	lr, spsr
-		str	lr, [r13, #4]			@ save spsr_IRQ
-		@
-		@ now branch to the relevent MODE handling routine
-		@
-		mrs	sp, cpsr			@ switch to SVC mode
-		bic	sp, sp, #31
-		orr	sp, sp, #0x13
-		msr	spsr, sp
-		and	lr, lr, #15
-		cmp	lr, #4
-		addlts	pc, pc, lr, lsl #2		@ Changes mode and branches
-		b	__irq_invalid			@  4 - 15
-		b	__irq_usr			@  0  (USR_26 / USR_32)
-		b	__irq_invalid			@  1  (FIQ_26 / FIQ_32)
-		b	__irq_invalid			@  2  (IRQ_26 / IRQ_32)
-		b	__irq_svc			@  3  (SVC_26 / SVC_32)
-/*
- *------------------------------------------------------------------------------------------------
- * Undef instr entry dispatcher - dispatches it to the correct handler for the processor mode
- *------------------------------------------------------------------------------------------------
- * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
- */
-.LCirq:		.word	__temp_irq
-.LCund:		.word	__temp_und
-.LCabt:		.word	__temp_abt
-
-vector_undefinstr:
-		@
-		@ save mode specific registers
-		@
-		ldr	r13, [pc, #.LCund - . - 8]
-		str	lr, [r13]
-		mrs	lr, spsr
-		str	lr, [r13, #4]
-		@
-		@ now branch to the relevent MODE handling routine
-		@
-		mrs	sp, cpsr
-		bic	sp, sp, #31
-		orr	sp, sp, #0x13
-		msr	spsr, sp
-		and	lr, lr, #15
-		cmp	lr, #4
-		addlts	pc, pc, lr, lsl #2		@ Changes mode and branches
-		b	__und_invalid			@  4 - 15
-		b	__und_usr			@  0 (USR_26 / USR_32)
-		b	__und_invalid			@  1 (FIQ_26 / FIQ_32)
-		b	__und_invalid			@  2 (IRQ_26 / IRQ_32)
-		b	__und_svc			@  3 (SVC_26 / SVC_32)
-/*
- *------------------------------------------------------------------------------------------------
- * Prefetch abort dispatcher - dispatches it to the correct handler for the processor mode
- *------------------------------------------------------------------------------------------------
- * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
- */
-vector_prefetch:
-		@
-		@ save mode specific registers
-		@
-		sub	lr, lr, #4
-		ldr	r13, .LCabt
-		str	lr, [r13]
-		mrs	lr, spsr
-		str	lr, [r13, #4]
-		@
-		@ now branch to the relevent MODE handling routine
-		@
-		mrs	sp, cpsr
-		bic	sp, sp, #31
-		orr	sp, sp, #0x13
-		msr	spsr, sp
-		and	lr, lr, #15
-		adds	pc, pc, lr, lsl #2		@ Changes mode and branches
-		b	__pabt_invalid			@  4 - 15
-		b	__pabt_usr			@  0  (USR_26 / USR_32)
-		b	__pabt_invalid			@  1  (FIQ_26 / FIQ_32)
-		b	__pabt_invalid			@  2  (IRQ_26 / IRQ_32)
-		b	__pabt_invalid			@  3  (SVC_26 / SVC_32)
 /*
- *------------------------------------------------------------------------------------------------
- * Data abort dispatcher - dispatches it to the correct handler for the processor mode
- *------------------------------------------------------------------------------------------------
- * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
+ * Invalid mode handlers
  */
-vector_data:	@
-		@ save mode specific registers
-		@
-		sub	lr, lr, #8
-		ldr	r13, .LCabt
-		str	lr, [r13]
-		mrs	lr, spsr
-		str	lr, [r13, #4]
-		@
-		@ now branch to the relevent MODE handling routine
-		@
-		mrs	sp, cpsr
-		bic	sp, sp, #31
-		orr	sp, sp, #0x13
-		msr	spsr, sp
-		and	lr, lr, #15
-		cmp	lr, #4
-		addlts	pc, pc, lr, lsl #2		@ Changes mode & branches
-		b	__dabt_invalid			@  4 - 15
-		b	__dabt_usr			@  0  (USR_26 / USR_32)
-		b	__dabt_invalid			@  1  (FIQ_26 / FIQ_32)
-		b	__dabt_invalid			@  2  (IRQ_26 / IRQ_32)
-		b	__dabt_svc			@  3  (SVC_26 / SVC_32)
+__pabt_invalid:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
+		stmia	sp, {r0 - lr}			@ Save XXX r0 - lr
+		ldr	r4, .LCabt
+		mov	r1, #BAD_PREFETCH
+		b	1f
 
-/*=============================================================================
- * Prefetch abort handler
- *-----------------------------------------------------------------------------
- */
-pabtmsg:	.ascii	"Pabt: %08lX\n\0"
-		.align
-__pabt_usr:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
-		stmia	sp, {r0 - r12}			@ Save r0 - r12
-		add	r8, sp, #S_PC
-		stmdb	r8, {sp, lr}^			@ Save sp_usr lr_usr
+__dabt_invalid:	sub	sp, sp, #S_FRAME_SIZE
+		stmia	sp, {r0 - lr}			@ Save SVC r0 - lr [lr *should* be intact]
 		ldr	r4, .LCabt
-		ldmia	r4, {r5 - r7}			@ Get USR pc, cpsr
-		stmia	r8, {r5 - r7}			@ Save USR pc, cpsr, old_r0
+		mov	r1, #BAD_DATA
+		b	1f
 
-		mrs	r7, cpsr			@ Enable interrupts if they were
-		bic	r7, r7, #I_BIT			@ previously
-		msr	cpsr, r7
-		mov	r0, r5				@ address (pc)
-		mov	r1, sp				@ regs
-		bl	SYMBOL_NAME(do_PrefetchAbort)	@ call abort handler
-		teq	r0, #0				@ Does this still apply???
-		bne	ret_from_exception		@ Return from exception
-#ifdef DEBUG_UNDEF
-		adr	r0, t
-		bl	SYMBOL_NAME(printk)
-#endif
-		mov	r0, r5
-		mov	r1, sp
-		and	r2, r6, #31
-		bl	SYMBOL_NAME(do_undefinstr)
-		ldr	lr, [sp, #S_PSR]		@ Get USR cpsr
-		msr	spsr, lr
-		ldmia	sp, {r0 - pc}^			@ Restore USR registers
+__irq_invalid:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate space on stack for frame
+		stmfd	sp, {r0 - lr}			@ Save r0 - lr
+		ldr	r4, .LCirq
+		mov	r1, #BAD_IRQ
+		b	1f
 
-__pabt_invalid:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
-		stmia	sp, {r0 - lr}			@ Save XXX r0 - lr
-		mov	r7, r0				@ OLD R0
-		ldr	r4, .LCabt
-		ldmia	r4, {r5 - r7}			@ Get XXX pc, cpsr
+__und_invalid:	sub	sp, sp, #S_FRAME_SIZE
+		stmia	sp, {r0 - lr}
+		ldr	r4, .LCund
+		mov	r1, #BAD_UNDEFINSTR		@ int reason
+
+1:		mov	fp, #0
+		ldmia	r4, {r5 - r7}			@ Get XXX pc, cpsr, old_r0
 		add	r4, sp, #S_PC
 		stmia	r4, {r5 - r7}			@ Save XXX pc, cpsr, old_r0
-		mov	r0, sp				@ Prefetch aborts are definitely *not*
-		mov	r1, #BAD_PREFETCH		@ allowed in non-user modes.  We cant
-		and	r2, r6, #31			@ recover from this problem.
+		mov	r0, sp
+		and	r2, r6, #31			@ int mode
 		b	SYMBOL_NAME(bad_mode)
 
-#ifdef DEBUG_UNDEF
-t:		.ascii "*** undef ***\r\n\0"
-		.align
-#endif
 
-/*=============================================================================
- * Data abort handler code
- *-----------------------------------------------------------------------------
- */
-.LCprocfns:	.word	SYMBOL_NAME(processor)
+wfs_mask_data:	.word	0x0e200110			@ WFS/RFS
+		.word	0x0fef0fff
+		.word	0x0d0d0100			@ LDF [sp]/STF [sp]
+		.word	0x0d0b0100			@ LDF [fp]/STF [fp]
+		.word	0x0f0f0f00
 
-__dabt_usr:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
-		stmia	sp, {r0 - r12}			@ save r0 - r12
-		add	r3, sp, #S_PC
-		stmdb	r3, {sp, lr}^
-		ldr	r0, .LCabt
-		ldmia	r0, {r0 - r2}			@ Get USR pc, cpsr
-		stmia	r3, {r0 - r2}			@ Save USR pc, cpsr, old_r0
-		mov	fp, #0
-		mrs	r2, cpsr			@ Enable interrupts if they were
-		bic	r2, r2, #I_BIT			@ previously
-		msr	cpsr, r2
-		ldr	r2, .LCprocfns
-		mov	lr, pc
-		ldr	pc, [r2, #8]			@ call processor specific code
-		mov	r3, sp
-		bl	SYMBOL_NAME(do_DataAbort)
-		b	ret_from_sys_call
+/* We get here if an undefined instruction happens and the floating
+ * point emulator is not present.  If the offending instruction was
+ * a WFS, we just perform a normal return as if we had emulated the
+ * operation.  This is a hack to allow some basic userland binaries
+ * to run so that the emulator module proper can be loaded. --philb
+ */
+fpe_not_present:
+		adr	r10, wfs_mask_data
+		ldmia	r10, {r4, r5, r6, r7, r8}
+		ldr	r10, [sp, #S_PC]		@ Load PC
+		sub	r10, r10, #-4
+		mask_pc	r10, r10
+		ldrt	r10, [r10]			@ get instruction
+		and	r5, r10, r5
+		teq	r5, r4				@ Is it WFS?
+		moveq	pc, r9
+		and	r5, r10, r8
+		teq	r5, r6				@ Is it LDF/STF on sp or fp?
+		teqne	r5, r7
+		movne	pc, lr
+		tst	r10, #0x00200000		@ Does it have WB
+		moveq	pc, r9
+		and	r4, r10, #255			@ get offset
+		and	r6, r10, #0x000f0000
+		tst	r10, #0x00800000		@ +/-
+		ldr	r5, [sp, r6, lsr #14]		@ Load reg
+		rsbeq	r4, r4, #0
+		add	r5, r5, r4, lsl #2
+		str	r5, [sp, r6, lsr #14]		@ Save reg
+		mov	pc, r9
 
+/*
+ * SVC mode handlers
+ */
+		.align	5
 __dabt_svc:	sub	sp, sp, #S_FRAME_SIZE
 		stmia	sp, {r0 - r12}			@ save r0 - r12
 		ldr	r2, .LCabt
 		add	r0, sp, #S_FRAME_SIZE
+		ldmia	r2, {r2 - r4}			@ get pc, cpsr
 		add	r5, sp, #S_SP
 		mov	r1, lr
-		ldmia	r2, {r2 - r4}			@ get pc, cpsr
 		stmia	r5, {r0 - r4}			@ save sp_SVC, lr_SVC, pc, cpsr, old_ro
 		tst	r3, #I_BIT
 		mrseq	r0, cpsr			@ Enable interrupts if they were
@@ -619,29 +458,15 @@ __dabt_svc:	sub	sp, sp, #S_FRAME_SIZE
 		msr	spsr, r0
 		ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
 
-__dabt_invalid:	sub	sp, sp, #S_FRAME_SIZE
-		stmia	sp, {r0 - lr}			@ Save SVC r0 - lr [lr *should* be intact]
-		mov	r7, r0
-		ldr	r4, .LCabt
-		ldmia	r4, {r5, r6}			@ Get SVC pc, cpsr
-		add	r4, sp, #S_PC
-		stmia	r4, {r5, r6, r7}		@ Save SVC pc, cpsr, old_r0
-		mov	r0, sp
-		mov	r1, #BAD_DATA
-		and	r2, r6, #31
-		b	SYMBOL_NAME(bad_mode)
-
-/*=============================================================================
- * Interrupt (IRQ) handler
- *-----------------------------------------------------------------------------
- */
-__irq_usr:	sub	sp, sp, #S_FRAME_SIZE
+		.align	5
+__irq_svc:	sub	sp, sp, #S_FRAME_SIZE
 		stmia	sp, {r0 - r12}			@ save r0 - r12
-		add	r8, sp, #S_PC
-		stmdb	r8, {sp, lr}^
-		ldr	r4, .LCirq
-		ldmia	r4, {r5 - r7}			@ get saved PC, SPSR
-		stmia	r8, {r5 - r7}			@ save pc, psr, old_r0
+		ldr	r7, .LCirq
+		add	r5, sp, #S_FRAME_SIZE
+		ldmia	r7, {r7 - r9}
+		add	r4, sp, #S_SP
+		mov	r6, lr
+		stmia	r4, {r5, r6, r7, r8, r9}	@ save sp_SVC, lr_SVC, pc, cpsr, old_ro
 1:		get_irqnr_and_base r0, r6, r5
 		movne	r1, sp
 		@
@@ -649,148 +474,414 @@ __irq_usr:	sub	sp, sp, #S_FRAME_SIZE
 		@
 		adrsvc	ne, lr, 1b
 		bne	do_IRQ
-		b	ret_with_reschedule
-
-		irq_prio_table
+		ldr	r0, [sp, #S_PSR]
+		msr	spsr, r0
+		ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
 
-__irq_svc:	sub	sp, sp, #S_FRAME_SIZE
+		.align	5
+__und_svc:	sub	sp, sp, #S_FRAME_SIZE
 		stmia	sp, {r0 - r12}			@ save r0 - r12
+		ldr	r7, .LCund
 		mov	r6, lr
-		ldr	r7, .LCirq
 		ldmia	r7, {r7 - r9}
 		add	r5, sp, #S_FRAME_SIZE
 		add	r4, sp, #S_SP
-		stmia	r4, {r5, r6, r7, r8, r9}	@ save sp_SVC, lr_SVC, pc, cpsr, old_ro
+		stmia	r4, {r5 - r9}			@ save sp_SVC, lr_SVC, pc, cpsr, old_ro
+
+		adrsvc	al, r9, 1f			@ r9  = normal FP return
+		bl	call_fpe			@ lr  = undefined instr return
+
+		mov	r0, r5				@ unsigned long pc
+		mov	r1, sp				@ struct pt_regs *regs
+		bl	SYMBOL_NAME(do_undefinstr)
+
+1:		ldr	lr, [sp, #S_PSR]		@ Get SVC cpsr
+		msr	spsr, lr
+		ldmia	sp, {r0 - pc}^			@ Restore SVC registers
+
+		.align	5
+.LCirq:		.word	__temp_irq
+.LCund:		.word	__temp_und
+.LCabt:		.word	__temp_abt
+.LCprocfns:	.word	SYMBOL_NAME(processor)
+.LCfp:		.word	SYMBOL_NAME(fp_enter)
+#ifdef CONFIG_ALIGNMENT_TRAP
+.LCswi:		.word	SYMBOL_NAME(cr_alignment)
+#endif
+
+		irq_prio_table
+
+/*
+ * User mode handlers
+ */
+#ifdef DEBUG_UNDEF
+t:		.ascii "Prefetch -> undefined instruction\n\0"
+		.align
+#endif
+		.align	5
+__dabt_usr:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
+		stmia	sp, {r0 - r12}			@ save r0 - r12
+		ldr	r4, .LCabt
+		add	r3, sp, #S_PC
+		ldmia	r4, {r0 - r2}			@ Get USR pc, cpsr
+		stmia	r3, {r0 - r2}			@ Save USR pc, cpsr, old_r0
+		stmdb	r3, {sp, lr}^
+
+#ifdef CONFIG_ALIGNMENT_TRAP
+		ldr	r7, [r4, #OFF_CR_ALIGNMENT(__temp_abt)]
+		mcr	p15, 0, r7, c1, c0
+#endif
+
+		mov	fp, #0
+		mrs	r2, cpsr			@ Enable interrupts if they were
+		bic	r2, r2, #I_BIT			@ previously
+		msr	cpsr, r2
+		ldr	r2, .LCprocfns
+		mov	lr, pc
+		ldr	pc, [r2, #8]			@ call processor specific code
+		mov	r3, sp
+		adrsvc	al, lr, ret_from_sys_call
+		b	SYMBOL_NAME(do_DataAbort)
+
+		.align	5
+__irq_usr:	sub	sp, sp, #S_FRAME_SIZE
+		stmia	sp, {r0 - r12}			@ save r0 - r12
+		ldr	r4, .LCirq
+		add	r8, sp, #S_PC
+		ldmia	r4, {r5 - r7}			@ get saved PC, SPSR
+		stmia	r8, {r5 - r7}			@ save pc, psr, old_r0
+		stmdb	r8, {sp, lr}^
+
+#ifdef CONFIG_ALIGNMENT_TRAP
+		ldr	r7, [r4, #OFF_CR_ALIGNMENT(__temp_irq)]
+		mcr	p15, 0, r7, c1, c0
+#endif
+
+		mov	fp, #0
 1:		get_irqnr_and_base r0, r6, r5
 		movne	r1, sp
+		adrsvc	ne, lr, 1b
 		@
 		@ routine called with r0 = irq number, r1 = struct pt_regs *
 		@
-		adrsvc	ne, lr, 1b
 		bne	do_IRQ
-		ldr	r0, [sp, #S_PSR]
-		msr	spsr, r0
-		ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
-
-__irq_invalid:	sub	sp, sp, #S_FRAME_SIZE	@ Allocate space on stack for frame
-		stmfd	sp, {r0 - lr}		@ Save r0 - lr
-		mov	r7, #-1
-		ldr	r4, .LCirq
-		ldmia	r4, {r5, r6}		@ get saved pc, psr
-		add	r4, sp, #S_PC
-		stmia	r4, {r5, r6, r7}
-		mov	fp, #0
-		mov	r0, sp
-		mov	r1, #BAD_IRQ
-		b	SYMBOL_NAME(bad_mode)
-
-/*=============================================================================
- * Undefined instruction handler
- *-----------------------------------------------------------------------------
- * Handles floating point instructions
- */
-.LC2:		.word	SYMBOL_NAME(fp_enter)
+		mov	r4, #0
+		b	ret_with_reschedule
 
+		.align	5
 __und_usr:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
 		stmia	sp, {r0 - r12}			@ Save r0 - r12
-		add	r8, sp, #S_PC
-		stmdb	r8, {sp, lr}^			@ Save user r0 - r12
 		ldr	r4, .LCund
+		add	r8, sp, #S_PC
 		ldmia	r4, {r5 - r7}
 		stmia	r8, {r5 - r7}			@ Save USR pc, cpsr, old_r0
-		mov	fp, #0
+		stmdb	r8, {sp, lr}^			@ Save user r0 - r12
 
-		adrsvc	al, r9, ret_from_exception	@ r9  = normal FP return
+#ifdef CONFIG_ALIGNMENT_TRAP
+		ldr	r7, [r4, #OFF_CR_ALIGNMENT(__temp_und)]
+		mcr	p15, 0, r7, c1, c0
+#endif
+
+		mov	fp, #0
+		adrsvc	al, r9, ret_from_sys_call	@ r9  = normal FP return
 		adrsvc	al, lr, fpundefinstr		@ lr  = undefined instr return
 
-1:		get_current_task r10
+call_fpe:	get_current_task r10
 		mov	r8, #1
 		strb	r8, [r10, #TSK_USED_MATH]	@ set current->used_math
+		ldr	r4, .LCfp
 		add	r10, r10, #TSS_FPESAVE		@ r10 = workspace
-		ldr	r4, .LC2
 		ldr	pc, [r4]			@ Call FP module USR entry point
 
-__und_svc:	sub	sp, sp, #S_FRAME_SIZE
-		stmia	sp, {r0 - r12}			@ save r0 - r12
-		mov	r6, lr
-		ldr	r7, .LCund
-		ldmia	r7, {r7 - r9}
-		add	r5, sp, #S_FRAME_SIZE
-		add	r4, sp, #S_SP
-		stmia	r4, {r5 - r9}			@ save sp_SVC, lr_SVC, pc, cpsr, old_ro
-
-		adrsvc	al, r9, 3f			@ r9  = normal FP return
-		bl	1b				@ lr  = undefined instr return
-
-		mov	r0, r5				@ unsigned long pc
-		mov	r1, sp				@ struct pt_regs *regs
-		bl	SYMBOL_NAME(do_undefinstr)
-
-3:		ldr	lr, [sp, #S_PSR]		@ Get SVC cpsr
-		msr	spsr, lr
-		ldmia	sp, {r0 - pc}^			@ Restore SVC registers
-
 fpundefinstr:	mov	r0, lr
 		mov	r1, sp
 		mrs	r4, cpsr			@ Enable interrupts
 		bic	r4, r4, #I_BIT
 		msr	cpsr, r4
-		adrsvc	al, lr, ret_from_exception
+		adrsvc	al, lr, ret_from_sys_call
 		b	SYMBOL_NAME(do_undefinstr)
 
-__und_invalid:	sub	sp, sp, #S_FRAME_SIZE
-		stmia	sp, {r0 - lr}
-		mov	r7, r0
-		ldr	r4, .LCund
-		ldmia	r4, {r5, r6}			@ Get UND/IRQ/FIQ/ABT pc, cpsr
-		add	r4, sp, #S_PC
-		stmia	r4, {r5, r6, r7}		@ Save UND/IRQ/FIQ/ABT pc, cpsr, old_r0
-		mov	r0, sp				@ struct pt_regs *regs
-		mov	r1, #BAD_UNDEFINSTR		@ int reason
-		and	r2, r6, #31			@ int mode
-		b	SYMBOL_NAME(bad_mode)		@ Does not ever return...
+		.align	5
+__pabt_usr:	sub	sp, sp, #S_FRAME_SIZE		@ Allocate frame size in one go
+		stmia	sp, {r0 - r12}			@ Save r0 - r12
+		ldr	r4, .LCabt
+		add	r8, sp, #S_PC
+		ldmia	r4, {r5 - r7}			@ Get USR pc, cpsr
+		stmia	r8, {r5 - r7}			@ Save USR pc, cpsr, old_r0
+		stmdb	r8, {sp, lr}^			@ Save sp_usr lr_usr
 
-/* We get here if an undefined instruction happens and the floating
- * point emulator is not present.  If the offending instruction was
- * a WFS, we just perform a normal return as if we had emulated the
- * operation.  This is a hack to allow some basic userland binaries
- * to run so that the emulator module proper can be loaded. --philb
- */
-fpe_not_present:
-		adr	r10, wfs_mask_data
-		ldmia	r10, {r4, r5, r6, r7, r8}
-		ldr	r10, [sp, #S_PC]		@ Load PC
-		sub	r10, r10, #4
-		mask_pc	r10, r10
-		ldrt	r10, [r10]			@ get instruction
-		and	r5, r10, r5
-		teq	r5, r4				@ Is it WFS?
-		moveq	pc, r9
-		and	r5, r10, r8
-		teq	r5, r6				@ Is it LDF/STF on sp or fp?
-		teqne	r5, r7
-		movne	pc, lr
-		tst	r10, #0x00200000		@ Does it have WB
-		moveq	pc, r9
-		and	r4, r10, #255			@ get offset
-		and	r6, r10, #0x000f0000
-		tst	r10, #0x00800000		@ +/-
-		rsbeq	r4, r4, #0
-		ldr	r5, [sp, r6, lsr #14]		@ Load reg
-		add	r5, r5, r4, lsl #2
-		str	r5, [sp, r6, lsr #14]		@ Save reg
-		mov	pc, r9
+#ifdef CONFIG_ALIGNMENT_TRAP
+		ldr	r7, [r4, #OFF_CR_ALIGNMENT(__temp_abt)]
+		mcr	p15, 0, r7, c1, c0
+#endif
 
-wfs_mask_data:	.word	0x0e200110			@ WFS
-		.word	0x0fff0fff
-		.word	0x0d0d0100			@ LDF [sp]/STF [sp]
-		.word	0x0d0b0100			@ LDF [fp]/STF [fp]
-		.word	0x0f0f0f00
+		mov	fp, #0
+		mrs	r7, cpsr			@ Enable interrupts if they were
+		bic	r7, r7, #I_BIT			@ previously
+		msr	cpsr, r7
+		mov	r0, r5				@ address (pc)
+		mov	r1, sp				@ regs
+		bl	SYMBOL_NAME(do_PrefetchAbort)	@ call abort handler
+		teq	r0, #0				@ Does this still apply???
+		bne	ret_from_sys_call		@ Return from exception
+#ifdef DEBUG_UNDEF
+		adr	r0, t
+		bl	SYMBOL_NAME(printk)
+#endif
+		mov	r0, r5
+		mov	r1, sp
+		and	r2, r6, #31
+		bl	SYMBOL_NAME(do_undefinstr)
+		ldr	lr, [sp, #S_PSR]		@ Get USR cpsr
+		msr	spsr, lr
+		ldmia	sp, {r0 - pc}^			@ Restore USR registers
 
 #include "entry-common.S"
 
+		.text
+
+#ifndef __ARM_ARCH_4__
+.Larm700bug:	ldr	r0, [sp, #S_PSR]		@ Get calling cpsr
+		str	lr, [r8]
+		msr	spsr, r0
+		ldmia	sp, {r0 - lr}^			@ Get calling r0 - lr
+		mov	r0, r0
+		ldr	lr, [sp, #S_PC]			@ Get PC
+		add	sp, sp, #S_FRAME_SIZE
+		movs	pc, lr
+#endif
+
+		.section ".text.init",#alloc,#execinstr
+/*
+ * Vector stubs.  NOTE that we only align 'vector_IRQ' to a cache line boundary,
+ * and we rely on each stub being exactly 48 (1.5 cache lines) in size.  This
+ * means that we only ever load two cache lines for this code, or one if we're
+ * lucky.  We also copy this code to 0x200 so that we can use branches in the
+ * vectors, rather than ldr's.
+ */
+		.align	5
+__stubs_start:
+/*
+ * Interrupt dispatcher
+ * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
+ */
+vector_IRQ:	@
+		@ save mode specific registers
+		@
+		ldr	r13, .LCsirq
+		sub	lr, lr, #4
+		str	lr, [r13]			@ save lr_IRQ
+		mrs	lr, spsr
+		str	lr, [r13, #4]			@ save spsr_IRQ
+		@
+		@ now branch to the relevent MODE handling routine
+		@
+		bic	r13, lr, #63
+		orr	r13, r13, #0x93
+		msr	spsr, r13			@ switch to SVC_32 mode
+
+		and	lr, lr, #15
+		adr	r13, .LCtab_irq
+		ldr	lr, [r13, lr, lsl #2]
+		movs	pc, lr				@ Changes mode and branches
+/*
+ * Data abort dispatcher - dispatches it to the correct handler for the processor mode
+ * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
+ */
+vector_data:	@
+		@ save mode specific registers
+		@
+		ldr	r13, .LCsabt
+		sub	lr, lr, #8
+		str	lr, [r13]
+		mrs	lr, spsr
+		str	lr, [r13, #4]
+		@
+		@ now branch to the relevent MODE handling routine
+		@
+		bic	r13, lr, #63
+		orr	r13, r13, #0x93
+		msr	spsr, r13			@ switch to SVC_32 mode
+
+		and	lr, lr, #15
+		adr	r13, .LCtab_dabt
+		ldr	lr, [r13, lr, lsl #2]
+		movs	pc, lr				@ Changes mode and branches
+
+/*
+ * Prefetch abort dispatcher - dispatches it to the correct handler for the processor mode
+ * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
+ */
+vector_prefetch:
+		@
+		@ save mode specific registers
+		@
+		ldr	r13, .LCsabt
+		sub	lr, lr, #4
+		str	lr, [r13]			@ save lr_ABT
+		mrs	lr, spsr
+		str	lr, [r13, #4]			@ save spsr_ABT
+		@
+		@ now branch to the relevent MODE handling routine
+		@
+		bic	r13, lr, #63
+		orr	r13, r13, #0x93
+		msr	spsr, r13			@ switch to SVC_32 mode
+
+		ands	lr, lr, #15
+		ldreq	lr, .LCtab_pabt
+		ldrne	lr, .LCtab_pabt + 4
+		movs	pc, lr
+
+/*
+ * Undef instr entry dispatcher - dispatches it to the correct handler for the processor mode
+ * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
+ */
+vector_undefinstr:
+		@
+		@ save mode specific registers
+		@
+		ldr	r13, .LCsund
+		str	lr, [r13]			@ save lr_UND
+		mrs	lr, spsr
+		str	lr, [r13, #4]			@ save spsr_UND
+		@
+		@ now branch to the relevent MODE handling routine
+		@
+		bic	r13, lr, #63
+		orr	r13, r13, #0x93
+		msr	spsr, r13			@ switch to SVC_32 mode
+
+		and	lr, lr, #15
+		adr	r13, .LCtab_und
+		ldr	lr, [r13, lr, lsl #2]
+		movs	pc, lr				@ Changes mode and branches
+
+/*=============================================================================
+ * Undefined FIQs
+ *-----------------------------------------------------------------------------
+ * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
+ * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg.
+ * Basically to switch modes, we *HAVE* to clobber one register...  brain
+ * damage alert!  I don't think that we can execute any code in here in any
+ * other mode than FIQ...  Ok you can switch to another mode, but you can't
+ * get out of that mode without clobbering one register.
+ */
+vector_FIQ:	disable_fiq
+		subs	pc, lr, #4
+
+/*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen, and won't happen in 32-bit data mode).
+ */
+
+vector_addrexcptn:
+		b	vector_addrexcptn
+
+/*
+ * We group all the following data together to optimise
+ * for CPUs with separate I & D caches.
+ */
+		.align	5
+
+.LCtab_irq:	.word	__irq_usr			@  0  (USR_26 / USR_32)
+		.word	__irq_invalid			@  1  (FIQ_26 / FIQ_32)
+		.word	__irq_invalid			@  2  (IRQ_26 / IRQ_32)
+		.word	__irq_svc			@  3  (SVC_26 / SVC_32)
+		.word	__irq_invalid			@  4
+		.word	__irq_invalid			@  5
+		.word	__irq_invalid			@  6
+		.word	__irq_invalid			@  7
+		.word	__irq_invalid			@  8
+		.word	__irq_invalid			@  9
+		.word	__irq_invalid			@  a
+		.word	__irq_invalid			@  b
+		.word	__irq_invalid			@  c
+		.word	__irq_invalid			@  d
+		.word	__irq_invalid			@  e
+		.word	__irq_invalid			@  f
+
+.LCtab_und:	.word	__und_usr			@  0 (USR_26 / USR_32)
+		.word	__und_invalid			@  1 (FIQ_26 / FIQ_32)
+		.word	__und_invalid			@  2 (IRQ_26 / IRQ_32)
+		.word	__und_svc			@  3 (SVC_26 / SVC_32)
+		.word	__und_invalid			@  4
+		.word	__und_invalid			@  5
+		.word	__und_invalid			@  6
+		.word	__und_invalid			@  7
+		.word	__und_invalid			@  8
+		.word	__und_invalid			@  9
+		.word	__und_invalid			@  a
+		.word	__und_invalid			@  b
+		.word	__und_invalid			@  c
+		.word	__und_invalid			@  d
+		.word	__und_invalid			@  e
+		.word	__und_invalid			@  f
+
+.LCtab_dabt:	.word	__dabt_usr			@  0  (USR_26 / USR_32)
+		.word	__dabt_invalid			@  1  (FIQ_26 / FIQ_32)
+		.word	__dabt_invalid			@  2  (IRQ_26 / IRQ_32)
+		.word	__dabt_svc			@  3  (SVC_26 / SVC_32)
+		.word	__dabt_invalid			@  4
+		.word	__dabt_invalid			@  5
+		.word	__dabt_invalid			@  6
+		.word	__dabt_invalid			@  7
+		.word	__dabt_invalid			@  8
+		.word	__dabt_invalid			@  9
+		.word	__dabt_invalid			@  a
+		.word	__dabt_invalid			@  b
+		.word	__dabt_invalid			@  c
+		.word	__dabt_invalid			@  d
+		.word	__dabt_invalid			@  e
+		.word	__dabt_invalid			@  f
+
+.LCtab_pabt:	.word	__pabt_usr
+		.word	__pabt_invalid
+
+.LCvswi:	.word	vector_swi
+
+.LCsirq:	.word	__temp_irq
+.LCsund:	.word	__temp_und
+.LCsabt:	.word	__temp_abt
+
+__stubs_end:
+
+		.equ	__real_stubs_start, .LCvectors + 0x200
+
+.LCvectors:	swi	SYS_ERROR0
+		b	__real_stubs_start + (vector_undefinstr - __stubs_start)
+		ldr	pc, __real_stubs_start + (.LCvswi - __stubs_start)
+		b	__real_stubs_start + (vector_prefetch - __stubs_start)
+		b	__real_stubs_start + (vector_data - __stubs_start)
+		b	__real_stubs_start + (vector_addrexcptn - __stubs_start)
+		b	__real_stubs_start + (vector_IRQ - __stubs_start)
+		b	__real_stubs_start + (vector_FIQ - __stubs_start)
+
+ENTRY(trap_init)
+		stmfd	sp!, {r4 - r6, lr}
+
+		adr	r1, .LCvectors			@ set up the vectors
+		mov	r0, #0
+		ldmia	r1, {r1, r2, r3, r4, r5, r6, ip, lr}
+		stmia	r0, {r1, r2, r3, r4, r5, r6, ip, lr}
+
+		add	r2, r0, #0x200
+		adr	r0, __stubs_start		@ copy stubs to 0x200
+		adr	r1, __stubs_end
+1:		ldr	r3, [r0], #4
+		str	r3, [r2], #4
+		cmp	r0, r1
+		blt	1b
+		LOADREGS(fd, sp!, {r4 - r6, pc})
+
 		.data
 
+/*
+ * Do not reorder these, and do not insert extra data between...
+ */
+
 __temp_irq:	.word	0				@ saved lr_irq
 		.word	0				@ saved spsr_irq
 		.word	-1				@ old_r0
@@ -800,3 +891,10 @@ __temp_und:	.word	0				@ Saved lr_und
 __temp_abt:	.word	0				@ Saved lr_abt
 		.word	0				@ Saved spsr_abt
 		.word	-1				@ old_r0
+
+		.globl	SYMBOL_NAME(cr_alignment)
+		.globl	SYMBOL_NAME(cr_no_alignment)
+SYMBOL_NAME(cr_alignment):
+		.space	4
+SYMBOL_NAME(cr_no_alignment):
+		.space	4
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index c77c0ea51..2fc0fdddc 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -1,51 +1,54 @@
 /*============================================================================
  * All exits to user mode from the kernel go through this code.
  */
-
-#include <linux/config.h>
-
 		.globl	ret_from_sys_call
 
-ret_from_exception:
-		adr	r0, 1f
-		ldmia	r0, {r0, r1}
+		.align	5
+fast_syscall_return:
+		str	r0, [sp, #S_R0 + 4]		@ returned r0
+slow_syscall_return:
+		add	sp, sp, #4
+ret_from_sys_call:
+		adr	r0, bh_data
+		ldmia	r0, {r0, r4}
 		ldr	r0, [r0]
-		ldr	r1, [r1]
+		ldr	r1, [r4]
 		tst	r0, r1
 		blne	SYMBOL_NAME(do_bottom_half)
-ret_from_intr:	ldr	r0, [sp, #S_PSR]
-		tst	r0, #3
-		beq	ret_with_reschedule
-		b	ret_from_all
+ret_with_reschedule:
+		get_current_task r1		@ check for scheduling
+		ldr	r0, [r1, #TSK_NEED_RESCHED]
+		teq	r0, #0
+		bne	ret_reschedule
+		ldr	r1, [r1, #TSK_SIGPENDING]
+		teq	r1, #0			@ check for signals
+		bne	ret_signal
+
+ret_from_all:	restore_user_regs
 
 ret_signal:	mov	r1, sp
 		adrsvc	al, lr, ret_from_all
+		mov	r2, r4
 		b	SYMBOL_NAME(do_signal)
 
-2:		bl	SYMBOL_NAME(schedule)
+ret_reschedule:	adrsvc	al, lr, ret_with_reschedule
+		b	SYMBOL_NAME(schedule)
 
-ret_from_sys_call:
-		adr	r0, 1f
+		.globl	ret_from_exception
+ret_from_exception:
+		adr	r0, bh_data
 		ldmia	r0, {r0, r1}
 		ldr	r0, [r0]
 		ldr	r1, [r1]
+		mov	r4, #0
 		tst	r0, r1
-		adrsvc	ne, lr, ret_from_intr
-		bne	SYMBOL_NAME(do_bottom_half)
-
-ret_with_reschedule:
-		get_current_task r1
-		ldr	r0, [r1, #TSK_NEED_RESCHED]
-		teq	r0, #0
-		bne	2b
-		ldr	r1, [r1, #TSK_SIGPENDING]
-		teq	r1, #0
-		bne	ret_signal
-
-ret_from_all:	restore_user_regs
+		blne	SYMBOL_NAME(do_bottom_half)
+		ldr	r0, [sp, #S_PSR]
+		tst	r0, #3			@ returning to user mode?
+		beq	ret_with_reschedule
+		b	ret_from_all
 
-1:		.word	SYMBOL_NAME(bh_mask)
-		.word	SYMBOL_NAME(bh_active)
+#include "calls.S"
 
 /*=============================================================================
  * SWI handler
@@ -57,84 +60,65 @@ ret_from_all:	restore_user_regs
  * too worried.
  */
 
-#include "calls.S"
-
+		.align	5
 vector_swi:	save_user_regs
-		mov	fp, #0
 		mask_pc	lr, lr
-		ldr	r6, [lr, #-4]!		@ get SWI instruction
+		mov	fp, #0
+		ldr	r6, [lr, #-4]		@ get SWI instruction
 		arm700_bug_check r6, r7
+#ifdef CONFIG_ALIGNMENT_TRAP
+		ldr	r7, .LCswi
+		ldr	r7, [r7]
+		mcr	p15, 0, r7, c1, c0
+#endif
 		enable_irqs r7
-		
+
+		str	r4, [sp, #-4]!		@ new style: (r0 = arg1, r4 = arg5)
+		adrsvc	al, lr, fast_syscall_return
+
 		bic	r6, r6, #0xff000000	@ mask off SWI op-code
 		eor	r6, r6, #OS_NUMBER<<20	@ check OS number
 		cmp	r6, #NR_syscalls	@ check upper syscall limit
 		bcs	2f
 
-		get_current_task r5
-		ldr	ip, [r5, #TSK_FLAGS]	@ check for syscall tracing
+		get_current_task r7
+		ldr	ip, [r7, #TSK_FLAGS]	@ check for syscall tracing
+		adr	r5, SYMBOL_NAME(sys_call_table)
 		tst	ip, #PF_TRACESYS
-		bne	1f
+		ldreq	pc, [r5, r6, lsl #2]	@ call sys routine
 
-		adr	ip, SYMBOL_NAME(sys_call_table)
-		str	r4, [sp, #-4]!		@ new style: (r0 = arg1, r5 = arg5)
-		mov	lr, pc
-		ldr	pc, [ip, r6, lsl #2]	@ call sys routine
-		add	sp, sp, #4
-		str	r0, [sp, #S_R0]		@ returned r0
-		b	ret_from_sys_call
-
-1:		ldr	r7, [sp, #S_IP]		@ save old IP
+		ldr	r7, [sp, #S_IP + 4]	@ save old IP
 		mov	r0, #0
-		str	r0, [sp, #S_IP]		@ trace entry [IP = 0]
+		str	r0, [sp, #S_IP + 4]	@ trace entry [IP = 0]
 		bl	SYMBOL_NAME(syscall_trace)
-		str	r7, [sp, #S_IP]
-		ldmia	sp, {r0 - r3}		@ have to reload r0 - r3
-		adr	ip, SYMBOL_NAME(sys_call_table)
-		str	r4, [sp, #-4]!		@ new style: (r0 = arg1, r5 = arg5)
+		str	r7, [sp, #S_IP + 4]
+
+		ldmib	sp, {r0 - r3}		@ have to reload r0 - r3
 		mov	lr, pc
-		ldr	pc, [ip, r6, lsl #2]	@ call sys routine
-		add	sp, sp, #4
-		str	r0, [sp, #S_R0]		@ returned r0
+		ldr	pc, [r5, r6, lsl #2]	@ call sys routine
+		str	r0, [sp, #S_R0 + 4]	@ returned r0
+
 		mov	r0, #1
-		str	r0, [sp, #S_IP]		@ trace exit [IP = 1]
+		str	r0, [sp, #S_IP + 4]	@ trace exit [IP = 1]
 		bl	SYMBOL_NAME(syscall_trace)
-		str	r7, [sp, #S_IP]
-		b	ret_from_sys_call
+		str	r7, [sp, #S_IP + 4]
+		b	slow_syscall_return
 
-2:		tst	r6, #0x00f00000		@ is it a Unix SWI?
+2:		add	r1, sp, #4
+		tst	r6, #0x00f00000		@ is it a Unix SWI?
 		bne	3f
-		cmp	r6, #(KSWI_SYS_BASE - KSWI_BASE)
-		bcc	4f			@ not private func
-		bic	r0, r6, #0x000f0000
-		mov	r1, sp
-		bl	SYMBOL_NAME(arm_syscall)
-		b	ret_from_sys_call
-
-3:		eor	r0, r6, #OS_NUMBER<<20	@ Put OS number back
-		mov	r1, sp
-		bl	SYMBOL_NAME(deferred)
-		ldmfd	sp, {r0 - r3}
-		b	ret_from_sys_call
-
-4:		bl	SYMBOL_NAME(sys_ni_syscall)
-		str	r0, [sp, #0]		@ returned r0
-		b	ret_from_sys_call
+		subs	r0, r6, #(KSWI_SYS_BASE - KSWI_BASE)
+		bcs	SYMBOL_NAME(arm_syscall)
+		b	SYMBOL_NAME(sys_ni_syscall) @ not private func
 
-@ r0 = syscall number
-@ r1 = syscall r0
-@ r5 = syscall r4
-@ ip = syscall table
-SYMBOL_NAME(sys_syscall):
-		mov	r6, r0
-		eor	r6, r6, #OS_NUMBER << 20
-		cmp	r6, #NR_syscalls		@ check range
-		movgt	r0, #-ENOSYS
-		movgt	pc, lr
-		add	sp, sp, #4			@ take of the save of our r4
-		ldmib	sp, {r0 - r4}			@ get our args
-		str	r4, [sp, #-4]!			@ Put our arg on the stack
-		ldr	pc, [ip, r6, lsl #2]
+3:		eor	r0, r6, #OS_NUMBER <<20	@ Put OS number back
+		adrsvc	al, lr, slow_syscall_return
+		b	SYMBOL_NAME(deferred)
+
+		.align	5
+
+bh_data:	.word	SYMBOL_NAME(bh_mask)
+		.word	SYMBOL_NAME(bh_active)
 
 ENTRY(sys_call_table)
 #include "calls.S"
@@ -142,10 +126,25 @@ ENTRY(sys_call_table)
 /*============================================================================
  * Special system call wrappers
  */
+@ r0 = syscall number
+@ r5 = syscall table
+SYMBOL_NAME(sys_syscall):
+		eor	r6, r0, #OS_NUMBER << 20
+		cmp	r6, #NR_syscalls	@ check range
+		ldmleib	sp, {r0 - r4}		@ get our args
+		strle	r4, [sp]		@ Put our arg on the stack
+		ldrle	pc, [r5, r6, lsl #2]
+		mov	r0, #-ENOSYS
+		mov	pc, lr
+
 sys_fork_wrapper:
 		add	r0, sp, #4
 		b	SYMBOL_NAME(sys_fork)
 
+sys_vfork_wrapper:
+		add	r0, sp, #4
+		b	SYMBOL_NAME(sys_vfork)
+
 sys_execve_wrapper:
 		add	r3, sp, #4
 		b	SYMBOL_NAME(sys_execve)
@@ -192,99 +191,6 @@ sys_sigaltstack_wrapper:
 		ldr	r2, [sp, #4 + S_SP]
 		b	do_sigaltstack
 
-/*
- *=============================================================================
- *		Low-level interface code
- *-----------------------------------------------------------------------------
- *		Trap initialisation
- *-----------------------------------------------------------------------------
- *
- * Note - FIQ code has changed.  The default is a couple of words in 0x1c, 0x20
- * that call _unexp_fiq.  Nowever, we now copy the FIQ routine to 0x1c (removes
- * some excess cycles).
- *
- * What we need to put into 0-0x1c are ldrs to branch to 0xC0000000
- * (the kernel).
- * 0x1c onwards is reserved for FIQ, so I think that I will allocate 0xe0 onwards for
- * the actual address to jump to.
- */
-
-		.section ".text.init",#alloc,#execinstr
-
-#if defined(CONFIG_CPU_32)
-/*
- * these go into 0x00
- */
-.Lbranches:	swi	SYS_ERROR0
-		ldr	pc, .Lbranches + 0xe4
-		ldr	pc, .Lbranches + 0xe8
-		ldr	pc, .Lbranches + 0xec
-		ldr	pc, .Lbranches + 0xf0
-		ldr	pc, .Lbranches + 0xf4
-		ldr	pc, .Lbranches + 0xf8
-		ldr	pc, .Lbranches + 0xfc
-/*
- * this is put into 0xe4 and above
- */
-.Ljump_addresses:
-		.word	vector_undefinstr	@ 0xe4
-		.word	vector_swi		@ 0xe8
-		.word	vector_prefetch		@ 0xec
-		.word	vector_data		@ 0xf0
-		.word	vector_addrexcptn	@ 0xf4
-		.word	vector_IRQ		@ 0xf8
-		.word	_unexp_fiq		@ 0xfc
-/*
- * initialise the trap system
- */
-ENTRY(trap_init)
-		stmfd	sp!, {r4 - r7, lr}
-		initialise_traps_extra
-		mov	r0, #0xe4
-		adr	r1, .Ljump_addresses
-		ldmia	r1, {r1 - r7}
-		stmia	r0, {r1 - r7}
-		mov	r0, #0
-		adr	r1, .Lbranches
-		ldmia	r1, {r1 - r7}
-		stmia	r0, {r1 - r7}
-		LOADREGS(fd, sp!, {r4 - r7, pc})
-#elif defined(CONFIG_CPU_26)
-.Ljump_addresses:
-		swi	SYS_ERROR0
-		.word	vector_undefinstr	- 12
-		.word	vector_swi		- 16
-		.word	vector_prefetch		- 20
-		.word	vector_data		- 24
-		.word	vector_addrexcptn	- 28
-		.word	vector_IRQ		- 32
-		.word	_unexp_fiq		- 36
-		b	. + 8
-/*
- * initialise the trap system
- */
-ENTRY(trap_init)
-		stmfd	sp!, {r4 - r7, lr}
-		adr	r1, .Ljump_addresses
-		ldmia	r1, {r1 - r7, ip, lr}
-		orr	r2, lr, r2, lsr #2
-		orr	r3, lr, r3, lsr #2
-		orr	r4, lr, r4, lsr #2
-		orr	r5, lr, r5, lsr #2
-		orr	r6, lr, r6, lsr #2
-		orr	r7, lr, r7, lsr #2
-		orr	ip, lr, ip, lsr #2
-		mov	r0, #0
-		stmia	r0, {r1 - r7, ip}
-		ldmfd	sp!, {r4 - r7, pc}^
-#endif
-
-		.previous
-
-/*============================================================================
- * FP support
- */
-
 		.data
 
 ENTRY(fp_enter)
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index a5da15c7f..e3e87469f 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -2,6 +2,8 @@
  *  linux/arch/arm/kernel/fiq.c
  *
  *  Copyright (C) 1998 Russell King
+ *  Copyright (C) 1998, 1999 Phil Blundell
+ *
  *  FIQ support written by Philip Blundell <philb@gnu.org>, 1998.
  *
  *  FIQ support re-written by Russell King to be more generic
@@ -78,7 +80,7 @@ int fiq_def_op(void *ref, int relinquish)
 		unprotect_page_0();
 		*(unsigned long *)FIQ_VECTOR = no_fiq_insn;
 		protect_page_0();
-		__flush_entry_to_ram(FIQ_VECTOR);
+		flush_icache_range(FIQ_VECTOR, FIQ_VECTOR + 4);
 	}
 
 	return 0;
@@ -106,28 +108,77 @@ void set_fiq_handler(void *start, unsigned int length)
 	memcpy((void *)FIQ_VECTOR, start, length);
 
 	protect_page_0();
-#ifdef CONFIG_CPU_32
-	processor.u.armv3v4._flush_cache_area(FIQ_VECTOR, FIQ_VECTOR + length, 1);
-#endif
+	flush_icache_range(FIQ_VECTOR, FIQ_VECTOR + length);
 }
 
+/*
+ * Taking an interrupt in FIQ mode is death, so both these functions
+ * disable irqs for the duration. 
+ */
 void set_fiq_regs(struct pt_regs *regs)
 {
-	/* not yet -
-	 * this is temporary to get the floppy working
-	 * again on RiscPC.  It *will* become more
-	 * generic.
-	 */
-#ifdef CONFIG_ARCH_ACORN
-	extern void floppy_fiqsetup(unsigned long len, unsigned long addr,
-					     unsigned long port);
-	floppy_fiqsetup(regs->ARM_r9, regs->ARM_r10, regs->ARM_fp);
+	register unsigned long tmp, tmp2;
+	__asm__ volatile (
+#ifdef CONFIG_CPU_26
+	"mov	%0, pc
+	bic	%1, %0, #0x3
+	orr	%1, %1, #0x0c000001
+	teqp	%1, #0		@ select FIQ mode
+	mov	r0, r0
+	ldmia	%2, {r8 - r14}
+	teqp	%0, #0		@ return to SVC mode
+	mov	r0, r0"
 #endif
+#ifdef CONFIG_CPU_32
+	"mrs	%0, cpsr
+	bic	%1, %0, #0xf
+	orr	%1, %1, #0xc1
+	msr	cpsr, %1	@ select FIQ mode
+	mov	r0, r0
+	ldmia	%2, {r8 - r14}
+	msr	cpsr, %0	@ return to SVC mode
+	mov	r0, r0"
+#endif
+	: "=r" (tmp), "=r" (tmp2)
+	: "r" (&regs->ARM_r8)
+	/* These registers aren't modified by the above code in a way
+	   visible to the compiler, but we mark them as clobbers anyway
+	   so that GCC won't put any of the input or output operands in
+	   them.  */
+	: "r8", "r9", "r10", "r11", "r12", "r13", "r14");
 }
 
 void get_fiq_regs(struct pt_regs *regs)
 {
-	/* not yet */
+	register unsigned long tmp, tmp2;
+	__asm__ volatile (
+#ifdef CONFIG_CPU_26
+	"mov	%0, pc
+	bic	%1, %0, #0x3
+	orr	%1, %1, #0x0c000001
+	teqp	%1, #0		@ select FIQ mode
+	mov	r0, r0
+	stmia	%2, {r8 - r14}
+	teqp	%0, #0		@ return to SVC mode
+	mov	r0, r0"
+#endif
+#ifdef CONFIG_CPU_32
+	"mrs	%0, cpsr
+	bic	%1, %0, #0xf
+	orr	%1, %1, #0xc1
+	msr	cpsr, %1	@ select FIQ mode
+	mov	r0, r0
+	stmia	%2, {r8 - r14}
+	msr	cpsr, %0	@ return to SVC mode
+	mov	r0, r0"
+#endif
+	: "=r" (tmp), "=r" (tmp2)
+	: "r" (&regs->ARM_r8)
+	/* These registers aren't modified by the above code in a way
+	   visible to the compiler, but we mark them as clobbers anyway
+	   so that GCC won't put any of the input or output operands in
+	   them.  */
+	: "r8", "r9", "r10", "r11", "r12", "r13", "r14");
 }
 
 int claim_fiq(struct fiq_handler *f)
diff --git a/arch/arm/kernel/head-armv.S b/arch/arm/kernel/head-armv.S
index cd4be86cb..2e13f0818 100644
--- a/arch/arm/kernel/head-armv.S
+++ b/arch/arm/kernel/head-armv.S
@@ -7,13 +7,21 @@
  */
 #include <linux/config.h>
 #include <linux/linkage.h>
+#include <asm/hardware.h>
+#include <asm/dec21285.h>
+
+		.globl	SYMBOL_NAME(swapper_pg_dir)
+		.equ	SYMBOL_NAME(swapper_pg_dir),	TEXTADDR - 0x4000
+
+		.section ".text.init",#alloc,#execinstr
+ENTRY(stext)
+ENTRY(_stext)
 
-#ifndef CONFIG_ARCH_VNC
 #if (TEXTADDR & 0xffff) != 0x8000
 #error TEXTADDR must start at 0xXXXX8000
 #endif
-#else
-		.text
+
+#ifdef CONFIG_ARCH_NETWINDER
 		mov	r0, r0
 		mov	r0, r0
 		mov	r0, r0
@@ -22,16 +30,34 @@
 		mov	r0, r0
 		mov	r0, r0
 		mov	r0, r0
+
+		adr	r2, 1f
+		ldmdb	r2, {r7, r8}
+		and	r3, r2, #0x0000c000
+		teq	r3, #0x00008000
+		beq	__entry
+		bic	r3, r2, #0xc000
+		orr	r3, r3, #0x8000
+		mov	r0, r3
+		mov	r4, #32
+		sub	r5, r8, r7
+		b	1f
+
+		.word	_stext
+		.word	_end
+
+1:		ldmia	r2!, {r6, r7, r8, r9}
+		stmia	r3!, {r6, r7, r8, r9}
+		subs	r4, r4, #16
+		bcs	1b
+		movs	r4, r5
+		mov	r5, #0
+		movne	pc, r0
+
 		mov	r0, #0
 		mov	r1, #5
 #endif
 
-#define DEBUG
-
-		.globl	SYMBOL_NAME(swapper_pg_dir)
-		.equ	SYMBOL_NAME(swapper_pg_dir),	TEXTADDR - 0x4000
-
-		.text
 /*
  * Entry point and restart point.  Entry *must* be called with r0 == 0,
  * MMU off.  Note! These should be unique!!! Please read Documentation/ARM-README
@@ -45,16 +71,15 @@
  *  r1 = 5 -> Corel Netwinder
  *  r1 = 6 -> CATS
  *  r1 = 7 -> tbox
+ *  r1 = 8 -> SA110/21285 as co-processor
  */
 
-ENTRY(stext)
-ENTRY(_stext)
 __entry:	teq	r0, #0					@ check for illegal entry...
 		bne	.Lerror					@ loop indefinitely
-		cmp	r1, #8					@ Unknown machine architecture
+		cmp	r1, #9					@ Unknown machine architecture
 		bge	.Lerror
-/* First thing to do is to get the page tables set up so that we can call the kernel
- * in the correct place.  This is relocatable code...
+/* First thing to do is to get the page tables set up so that we can call
+ * the kernel in the correct place.  This is relocatable code...
  * - Read processor ID register (CP#15, CR0).
  */
 		mrc	p15, 0, r9, c0, c0			@ get Processor ID
@@ -74,7 +99,7 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 
 		adr	r4, .LCMachTypes
 		add	r4, r4, r1, lsl #4
-		ldmia	r4, {r4, r5, r6}
+		ldmia	r4, {r4, r5, r6, r7}
 /*
  * r4 = page dir in physical ram
  * r5 = physical address of start of RAM
@@ -99,26 +124,28 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 		add	r3, r3, #1 << 20
 		str	r3, [r0], #4
 		add	r3, r3, #1 << 20
-#ifdef DEBUG
+#ifdef CONFIG_DEBUG_LL
 /* Map in IO space
  * This allows debug messages to be output via a serial
  * before/while paging_init.
  */
-		add	r0, r4, #0x3800
+		add	r0, r4, r7
 		orr	r3, r6, r8
 		add	r2, r0, #0x0800
 1:		str	r3, [r0], #4
 		add	r3, r3, #1 << 20
 		teq	r0, r2
 		bne	1b
-#ifdef CONFIG_ARCH_VNC
-		add	r0, r4, #0x3f00
-		add	r0, r0, #0x00f8
+#ifdef CONFIG_ARCH_NETWINDER
+		teq	r1, #5
+		bne	1f
+		add	r0, r4, #0x3fc0
 		mov	r3, #0x7c000000
 		orr	r3, r3, r8
 		str	r3, [r0], #4
 		add	r3, r3, #1 << 20
 		str	r3, [r0], #4
+1:
 #endif
 #endif
 #ifdef CONFIG_ARCH_RPC
@@ -168,49 +195,55 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 .LCMachTypes:	.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000	@ Address of page tables (physical)
 		.long	0					@ Address of RAM
 		.long	0xe0000000				@ I/O address
-		.long	0
+		.long	0x3800
 
 		@ Acorn RiscPC
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x10000000
 		.long	0x10000000
 		.long	0x03000000
-		.long	0
+		.long	0x3800
 
 		@ EBSIT ???
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000
 		.long	0
 		.long	0xe0000000
-		.long	0
+		.long	0x3800
 
 		@ NexusPCI
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x40000000
 		.long	0x40000000
 		.long	0x10000000
-		.long	0
+		.long	0x3800
 
 		@ DEC EBSA285
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical)
 		.long	0					@ Address of RAM
 		.long	0x24000000				@ I/O base address (0x42000000 -> 0xFE000000)
-		.long	0
+		.long	0x3800
 
 		@ Corel VNC
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical)
 		.long	0					@ Address of RAM
 		.long	0x24000000				@ I/O base address (0x42000000 -> 0xfe000000)
-		.long	0
+		.long	0x3800
 
 		@ CATS
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 @ Address of page tables (physical)
 		.long	0					@ Address of RAM
 		.long	0x24000000				@ I/O base address (0x42000000 -> 0xfe000000)
-		.long	0
+		.long	0x3800
 
 		@ tbox
 		.long	SYMBOL_NAME(swapper_pg_dir) - 0xc0000000 + 0x80000000
 		.long	0x80000000				@ Address of RAM
 		.long	0x00400000				@ Uart
-		.long	0
+		.long	0x3800
+
+		@ DEC EBSA285 as co-processor
+		.long	0x4000					@ Address of page tables (physical)
+		.long	0					@ Address of RAM
+		.long	DC21285_ARMCSR_BASE			@ Physical I/O base address
+		.long	0x7cf00000 >> 18			@ Virtual I/O base address
 
 .LCProcTypes:	@ ARM6 / 610
 		.long	0x41560600
@@ -250,7 +283,11 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 		mcr	p15, 0, r4, c2, c0			@ load page table pointer
 		mov	r0, #0x1f				@ Domains 0, 1 = client
 		mcr	p15, 0, r0, c3, c0			@ load domain access register
+#ifdef CONFIG_ALIGNMENT_TRAP
+		mov	r0, #0x3f				@ ....S..DPWCAM
+#else
 		mov	r0, #0x3d				@ ....S..DPWC.M
+#endif
 		orr	r0, r0, #0x100
 		mov	pc, lr
 
@@ -261,7 +298,11 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 		mcr	p15, 0, r4, c2, c0			@ load page table pointer
 		mov	r0, #0x1f				@ Domains 0, 1 = client
 		mcr	p15, 0, r0, c3, c0			@ load domain access register
+#ifdef CONFIG_ALIGNMENT_TRAP
+		mov	r0, #0x7f				@ ....S.LDPWCAM
+#else
 		mov	r0, #0x7d				@ ....S.LDPWC.M
+#endif
 		orr	r0, r0, #0x100
 		mov	pc, lr
 
@@ -276,32 +317,38 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 		mrc	p15, 0, r0, c1, c0			@ get control register v4
 		bic	r0, r0, #0x0e00
 		bic	r0, r0, #0x0002
+#ifdef CONFIG_ALIGNMENT_TRAP
+		orr	r0, r0, #0x003f				@ I...S..DPWCAM
+#else
 		orr	r0, r0, #0x003d				@ I...S..DPWC.M
+#endif
 		orr	r0, r0, #0x1100				@ v4 supports separate I cache
 		mov	pc, lr
 
-		.section ".text.init",#alloc,#execinstr
-
 .Lsa_fastclock:	mcr	p15, 0, r4, c15, c1, 2			@ Enable clock switching
 		mov	pc, lr
 
 .LC0:		.long	SYMBOL_NAME(__entry)
-		.long	SYMBOL_NAME(machine_type)
+		.long	SYMBOL_NAME(__machine_arch_type)
 		.long	SYMBOL_NAME(__bss_start)
 		.long	SYMBOL_NAME(processor_id)
 		.long	SYMBOL_NAME(_end)
+		.long	SYMBOL_NAME(cr_alignment)
 		.long	SYMBOL_NAME(init_task_union)+8192
 		.align
 
 .Lalready_done_mmap:
 		adr	r4, .LC0
-		ldmia	r4, {r3, r4, r5, r6, r8, sp}		@ Setup stack
+		ldmia	r4, {r3, r4, r5, r6, r7, r8, sp}	@ Setup stack
 		add	r10, r10, r3				@ Add base back in
 		mov	fp, #0
-1:		cmp	r5, r8					@ Clear BSS
+1:		cmp	r5, r7					@ Clear BSS
 		strcc	fp, [r5],#4
 		bcc	1b
 
+		bic	r2, r0, #2				@ Clear 'A' bit
+		stmia	r8, {r0, r2}				@ Save control register values
+
 		str	r1, [r4]				@ Save machine type
 		str	r9, [r6]				@ Save processor ID
 		mov	lr, pc
@@ -310,10 +357,12 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 		b	SYMBOL_NAME(start_kernel)
 
 		.text
-#ifdef DEBUG
+
+#ifdef CONFIG_DEBUG_LL
 /*
  * Some debugging routines (useful if you've got MM problems and
- * printk isn't working).  For DEBUGGING ONLY!!!
+ * printk isn't working).  For DEBUGGING ONLY!!!  Do not leave
+ * references to these in a production kernel!
  */
 #if defined(CONFIG_ARCH_RPC)
 		.macro	addruart,rx
@@ -362,64 +411,71 @@ __entry:	teq	r0, #0					@ check for illegal entry...
 		beq	1001b
 		.endm
 
-#elif defined(CONFIG_ARCH_EBSA285)
+#elif defined(CONFIG_HOST_FOOTBRIDGE) || defined(CONFIG_ADDIN_FOOTBRIDGE)
+#ifndef CONFIG_DEBUG_DC21285_PORT
+	/* For NetWinder debugging */
 		.macro	addruart,rx
-		mov	\rx, #0xfe000000
+		mov	\rx, #0xff000000
+		orr	\rx, \rx, #0x000003f8
 		.endm
 
 		.macro	senduart,rd,rx
-		str	\rd, [\rx, #0x160]	@ UARTDR
+		strb	\rd, [\rx]
 		.endm
 
 		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, #0x178]	@ UARTFLG
-		tst	\rd, #1 << 3
-		bne	1001b
+1002:		ldrb	\rd, [\rx, #0x5]
+		and	\rd, \rd, #0x60
+		teq	\rd, #0x60
+		bne	1002b
 		.endm
 
 		.macro	waituart,rd,rx
+1001:		ldrb	\rd, [\rx, #0x6]
+		tst	\rd, #0x10
+		beq	1001b
 		.endm
+#else
+	/* For EBSA285 debugging */
+		.equ	dc21285_high, ARMCSR_BASE & 0xff000000
+		.equ	dc21285_low,  ARMCSR_BASE & 0x00ffffff
 
-#elif defined(CONFIG_ARCH_NEXUSPCI)
 		.macro	addruart,rx
-		ldr	\rx, =0xfff00000
+		mov	\rx, #dc21285_high
+		.if	dc21285_low
+		orr	\rx, \rx, #dc21285_low
+		.endif
 		.endm
 
 		.macro	senduart,rd,rx
-		str	\rd, [\rx, #0xc]
+		str	\rd, [\rx, #0x160]	@ UARTDR
 		.endm
 
 		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, #0x4]
-		tst	\rd, #1 << 0
+1001:		ldr	\rd, [\rx, #0x178]	@ UARTFLG
+		tst	\rd, #1 << 3
 		bne	1001b
 		.endm
 
 		.macro	waituart,rd,rx
 		.endm
-
-#elif defined(CONFIG_ARCH_VNC)
+#endif
+#elif defined(CONFIG_ARCH_NEXUSPCI)
 		.macro	addruart,rx
-		mov	\rx, #0xff000000
-		orr	\rx, \rx, #0x00e00000
-		orr	\rx, \rx, #0x000003f8
+		ldr	\rx, =0xfff00000
 		.endm
 
 		.macro	senduart,rd,rx
-		strb	\rd, [\rx]
+		str	\rd, [\rx, #0xc]
 		.endm
 
 		.macro	busyuart,rd,rx
-1002:		ldrb	\rd, [\rx, #0x5]
-		and	\rd, \rd, #0x60
-		teq	\rd, #0x60
-		bne	1002b
+1001:		ldr	\rd, [\rx, #0x4]
+		tst	\rd, #1 << 0
+		bne	1001b
 		.endm
 
 		.macro	waituart,rd,rx
-1001:		ldrb	\rd, [\rx, #0x6]
-		tst	\rd, #0x10
-		beq	1001b
 		.endm
 #else
 #error Unknown architecture
@@ -476,8 +532,6 @@ ENTRY(printch)
 		mov	r0, #0
 		b	1b
 
-		.ltorg
-
 		.bss
 hexbuf:		.space 16
 
diff --git a/arch/arm/kernel/hw-ebsa285.c b/arch/arm/kernel/hw-ebsa285.c
deleted file mode 100644
index e3385696b..000000000
--- a/arch/arm/kernel/hw-ebsa285.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * arch/arm/kernel/hw-ebsa286.c
- *
- * EBSA285 hardware specific functions
- *
- * Copyright (C) 1998 Russell King, Phil Blundel
- */
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/ptrace.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-
-#include <asm/irq.h>
-#include <asm/system.h>
-
-extern int setup_arm_irq(int, struct irqaction *);
-
-extern void pci_set_cmd(struct pci_dev *dev, unsigned short clear, unsigned short set);
-extern void pci_set_base_addr(struct pci_dev *dev, int idx, unsigned int addr);
-extern void pci_set_irq_line(struct pci_dev *dev, unsigned int irq);
-
-static int irqmap_ebsa[] __initdata = { 9, 8, 18, 11 };
-static int irqmap_cats[] __initdata = { 18, 8, 9, 11 };
-
-__initfunc(static int ebsa_irqval(struct pci_dev *dev))
-{
-	unsigned char pin;
-	
-	pcibios_read_config_byte(dev->bus->number,
-				 dev->devfn,
-				 PCI_INTERRUPT_PIN,
-				 &pin);
-	
-	return irqmap_ebsa[(PCI_SLOT(dev->devfn) + pin) & 3];
-}
-
-__initfunc(static int cats_irqval(struct pci_dev *dev))
-{
-	if (dev->irq >= 128)
-		return 32 + (dev->irq & 0x1f);
-
-	switch (dev->irq) {
-	case 1:
-	case 2:
-	case 3:
-	case 4:
-		return irqmap_cats[dev->irq - 1];
-	case 0:
-		return 0;
-	}
-
-	printk("PCI: device %02x:%02x has unknown irq line %x\n",
-	       dev->bus->number, dev->devfn, dev->irq);
-	return 0;
-}
-
-__initfunc(void pcibios_fixup_ebsa285(struct pci_dev *dev))
-{
-	char cmd;
-
-	/* sort out the irq mapping for this device */
-	switch (machine_type) {
-	case MACH_TYPE_EBSA285:
-		dev->irq = ebsa_irqval(dev);
-		break;
-	case MACH_TYPE_CATS:
-		dev->irq = cats_irqval(dev);
-		break;
-	}
-
-	/* Turn on bus mastering - boot loader doesn't
-	 * - perhaps it should! - dag
-	 */
-	pci_read_config_byte(dev, PCI_COMMAND, &cmd);
-	pci_write_config_byte(dev, PCI_COMMAND, cmd | PCI_COMMAND_MASTER);
-}
-
-static void irq_pci_err(int irq, void *dev_id, struct pt_regs *regs)
-{
-	const char *err = "unknown";
-	unsigned long cmd = *(unsigned long *)0xfe000004 & 0xffff;
-	unsigned long ctrl = *(unsigned long *)0xfe00013c & 0xffffde07;
-	static unsigned long next_warn[7];
-	int idx = 6;
-
-	switch(irq) {
-	case IRQ_PCIPARITY:
-		*(unsigned long *)0xfe000004 = cmd | 1 << 31;
-		idx = 0;
-		err = "parity";
-		break;
-
-	case IRQ_PCITARGETABORT:
-		*(unsigned long *)0xfe000004 = cmd | 1 << 28;
-		idx = 1;
-		err = "target abort";
-		break;
-
-	case IRQ_PCIMASTERABORT:
-		*(unsigned long *)0xfe000004 = cmd | 1 << 29;
-		idx = 2;
-		err = "master abort";
-		break;
-
-	case IRQ_PCIDATAPARITY:
-		*(unsigned long *)0xfe000004 = cmd | 1 << 24;
-		idx = 3;
-		err = "data parity";
-		break;
-
-	case IRQ_DISCARDTIMER:
-		*(unsigned long *)0xfe00013c = ctrl | 1 << 8;
-		idx = 4;
-		err = "discard timer";
-		break;
-
-	case IRQ_SERR:
-		*(unsigned long *)0xfe00013c = ctrl | 1 << 3;
-		idx = 5;
-		err = "system";
-		break;
-	}
-	if (time_after_eq(jiffies, next_warn[idx])) {
-		next_warn[idx] = jiffies + 3 * HZ / 100;
-		printk(KERN_ERR "PCI %s error detected\n", err);
-	}
-}
-
-static struct irqaction irq_pci_error = {
-	irq_pci_err, SA_INTERRUPT, 0, "PCI error", NULL, NULL
-};
-
-__initfunc(void pcibios_init_ebsa285(void))
-{
-	setup_arm_irq(IRQ_PCIPARITY, &irq_pci_error);
-	setup_arm_irq(IRQ_PCITARGETABORT, &irq_pci_error);
-	setup_arm_irq(IRQ_PCIMASTERABORT, &irq_pci_error);
-	setup_arm_irq(IRQ_PCIDATAPARITY, &irq_pci_error);
-	setup_arm_irq(IRQ_DISCARDTIMER, &irq_pci_error);
-	setup_arm_irq(IRQ_SERR, &irq_pci_error);
-
-	/*
-	 * Map our SDRAM at a known address in PCI space, just in case
-	 * the firmware had other ideas.  Using a nonzero base is slightly
-	 * bizarre but apparently necessary to avoid problems with some
-	 * video cards.
-	 *
-	 * We should really only do this if the central function is enabled.
-	 */
-	*(unsigned long *)0xfe000010 = 0;
-	*(unsigned long *)0xfe000018 = 0xe0000000;
-	*(unsigned long *)0xfe0000f8 = 0;
-	*(unsigned long *)0xfe0000fc = 0;
-	*(unsigned long *)0xfe000100 = 0x01fc0000;
-	*(unsigned long *)0xfe000104 = 0;
-	*(unsigned long *)0xfe000108 = 0x80000000;
-	*(unsigned long *)0xfe000004 = 0x17;
-}
diff --git a/arch/arm/kernel/hw-footbridge.c b/arch/arm/kernel/hw-footbridge.c
new file mode 100644
index 000000000..857f120e1
--- /dev/null
+++ b/arch/arm/kernel/hw-footbridge.c
@@ -0,0 +1,893 @@
+/*
+ * arch/arm/kernel/hw-footbridge.c
+ *
+ * Footbridge-dependent machine fixup
+ *
+ * Copyright (C) 1998, 1999 Russell King, Phil Blundell
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/dec21285.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/leds.h>
+#include <asm/system.h>
+
+#define IRDA_IO_BASE		0x180
+#define ETHER10_IO_BASE		0x301
+#define GP1_IO_BASE		0x338
+#define GP2_IO_BASE		0x33a
+#define DEC21143_IO_BASE	0x401
+#define DEC21143_MEM_BASE	0x00800000
+#define CYBER2000_MEM_BASE	0x01000000
+
+int	have_isa_bridge;
+
+extern int setup_arm_irq(int, struct irqaction *);
+extern void pci_set_cmd(struct pci_dev *dev, unsigned short clear, unsigned short set);
+extern void pci_set_base_addr(struct pci_dev *dev, int idx, unsigned int addr);
+extern void pci_set_irq_line(struct pci_dev *dev, unsigned int irq);
+extern void (*kd_mksound)(unsigned int hz, unsigned int ticks);
+
+#ifdef CONFIG_PCI
+
+static int irqmap_ebsa[] __initdata = { IRQ_IN1, IRQ_IN0, IRQ_PCI, IRQ_IN3 };
+
+__initfunc(static int ebsa_irqval(struct pci_dev *dev))
+{
+	unsigned char pin;
+	
+	pcibios_read_config_byte(dev->bus->number,
+				 dev->devfn,
+				 PCI_INTERRUPT_PIN,
+				 &pin);
+	
+	return irqmap_ebsa[(PCI_SLOT(dev->devfn) + pin) & 3];
+}
+
+#ifdef CONFIG_CATS
+static int irqmap_cats[] __initdata = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 };
+
+__initfunc(static int cats_irqval(struct pci_dev *dev))
+{
+	if (dev->irq >= 128)
+		return 16 + (dev->irq & 0x1f);
+
+	switch (dev->irq) {
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+		return irqmap_cats[dev->irq - 1];
+	case 0:
+		return 0;
+	}
+
+	printk("PCI: device %02x:%02x has unknown irq line %x\n",
+	       dev->bus->number, dev->devfn, dev->irq);
+	return 0;
+}
+#endif
+
+__initfunc(void pcibios_fixup_ebsa285(struct pci_dev *dev))
+{
+	/* Latency timer of 32 */
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 32);
+
+	/* 32-byte cache line size */
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 8);
+
+	/* Set SysErr enable, Parity enable */
+	pci_set_cmd(dev, 0, PCI_COMMAND_FAST_BACK | PCI_COMMAND_SERR | PCI_COMMAND_PARITY);
+
+	/* If this device is an ISA bridge, set the
+	 * have_isa_bridge flag.  We will then go looking
+	 * for things like keyboard, etc
+	 */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA ||
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_EISA)
+		have_isa_bridge = !0;
+
+	/* sort out the irq mapping for this device */
+	switch (machine_arch_type) {
+	case MACH_TYPE_EBSA285:
+		dev->irq = ebsa_irqval(dev);
+		/* Turn on bus mastering - boot loader doesn't
+		 * - perhaps it should! - dag
+		 */
+		pci_set_cmd(dev, 0, PCI_COMMAND_MASTER);
+		break;
+
+#ifdef CONFIG_CATS
+	case MACH_TYPE_CATS:
+		dev->irq = cats_irqval(dev);
+		/* Turn on bus mastering - boot loader doesn't
+		 * - perhaps it should! - dag
+		 */
+		pci_set_cmd(dev, 0, PCI_COMMAND_MASTER);
+		break;
+#endif
+#ifdef CONFIG_ARCH_NETWINDER
+	case MACH_TYPE_NETWINDER:
+		/* disable ROM */
+		pci_write_config_dword(dev, PCI_ROM_ADDRESS, 0);
+
+#define DEV(v,d) ((v)<<16|(d))
+		switch (DEV(dev->vendor, dev->device)) {
+		/* Ether 100 */
+		case DEV(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21142):
+			pci_set_base_addr(dev, 0, DEC21143_IO_BASE);
+			pci_set_base_addr(dev, 1, DEC21143_MEM_BASE);
+			pci_set_cmd(dev, 0, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
+			/* Put the chip to sleep in case the driver isn't loaded */
+			pci_write_config_dword(dev, 0x40, 0x80000000);
+			dev->irq = IRQ_NETWINDER_ETHER100;
+			break;
+
+		/* Ether 10 */
+		case DEV(PCI_VENDOR_ID_WINBOND2,0x5a5a):
+			pci_set_base_addr(dev, 0, ETHER10_IO_BASE);
+			pci_set_cmd(dev, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY, PCI_COMMAND_IO);
+			dev->irq = IRQ_NETWINDER_ETHER10;
+			break;
+
+		/* ISA bridge */
+		case DEV(PCI_VENDOR_ID_WINBOND,PCI_DEVICE_ID_WINBOND_83C553):
+			pci_set_base_addr(dev, 0, 0);
+			pci_set_cmd(dev, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY, PCI_COMMAND_IO);
+			/*
+			 * Enable all memory requests from ISA to be channeled to PCI
+			 */
+			pci_write_config_byte(dev, 0x48, 255);
+			/*
+			 * Disable ping-pong (as per errata)
+			 */
+			pci_write_config_byte(dev, 0x42, 0);
+			/*
+			 * Enable PCI packet retry
+			 */
+			pci_write_config_byte(dev, 0x40, 0x22);
+			/*
+			 * Do not use PCI CPU park enable, park on
+			 * last master, disable GAT bit
+			 */
+			pci_write_config_byte(dev, 0x83, 0x02);
+			/*
+			 * Default rotating priorities
+			 */
+			pci_write_config_byte(dev, 0x80, 0xe0);
+			/*
+			 * Rotate bank 4
+			 */
+			pci_write_config_byte(dev, 0x81, 0x01);
+			break;
+
+		/* IDE */
+		case DEV(PCI_VENDOR_ID_WINBOND,PCI_DEVICE_ID_WINBOND_82C105):
+			pci_set_base_addr(dev, 0, 0x1f1);
+			pci_set_base_addr(dev, 1, 0x3f5);
+			pci_set_base_addr(dev, 2, 0x171);
+			pci_set_base_addr(dev, 3, 0x375);
+			pci_set_base_addr(dev, 4, 0xe801);
+			pci_set_cmd(dev, PCI_COMMAND_MEMORY, PCI_COMMAND_MASTER | PCI_COMMAND_IO);
+			dev->irq = IRQ_ISA_HARDDISK1;
+			break;
+
+		/* VGA */
+		case DEV(PCI_VENDOR_ID_INTERG,0x2000):
+			pci_set_base_addr(dev, 0, CYBER2000_MEM_BASE);
+			pci_set_cmd(dev, PCI_COMMAND_MASTER, PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+			dev->irq = IRQ_NETWINDER_VGA;
+			break;
+		}
+#endif
+	}
+}
+
+static inline void
+report_pci_dev_error(void)
+{
+	struct pci_dev *dev;
+
+	for (dev = pci_devices; dev; dev = dev->next) {
+		unsigned short status;
+
+		pci_read_config_word(dev, PCI_STATUS, &status);
+		if (status & 0xf900) {
+			printk(KERN_DEBUG "PCI: [%04X:%04X] status = %X\n",
+				dev->vendor, dev->device, status);
+
+			pci_write_config_word(dev, PCI_STATUS, status & 0xf900);
+		}
+	}
+}
+#else
+#define report_pci_dev_error()
+#endif
+
+/*
+ * Warn on PCI errors.  Please report any occurances!
+ */
+static void
+irq_pci_err(int irq, void *dev_id, struct pt_regs *regs)
+{
+	static unsigned long next_warn;
+	unsigned long cmd       = *CSR_PCICMD & 0x0000ffff;
+	unsigned long ctrl      = (*CSR_SA110_CNTL) & 0xffffde07;
+	unsigned long irqstatus = *CSR_IRQ_RAWSTATUS;
+	int warn = time_after_eq(jiffies, next_warn);
+
+	ctrl |= SA110_CNTL_DISCARDTIMER;
+
+	if (warn) {
+		next_warn = jiffies + 3 * HZ / 100;
+		printk(KERN_DEBUG "PCI: ");
+	}
+
+	if (irqstatus & (1 << 31)) {
+		if (warn)
+			printk("parity error ");
+		cmd |= 1 << 31;
+	}
+
+	if (irqstatus & (1 << 30)) {
+		if (warn)
+			printk("target abort ");
+		cmd |= 1 << 28;
+	}
+
+	if (irqstatus & (1 << 29)) {
+		if (warn)
+			printk("master abort ");
+		cmd |= 1 << 29;
+	}
+
+	if (irqstatus & (1 << 28)) {
+		if (warn)
+			printk("data parity error ");
+		cmd |= 1 << 24;
+	}
+
+	if (irqstatus & (1 << 27)) {
+		if (warn)
+			printk("discard timer expired ");
+		ctrl &= ~SA110_CNTL_DISCARDTIMER;
+	}
+
+	if (irqstatus & (1 << 23)) {
+		if (warn)
+			printk("system error ");
+		ctrl |= SA110_CNTL_RXSERR;
+	}
+
+	if (warn)
+		printk("pc=[<%08lX>]\n", instruction_pointer(regs));
+
+	report_pci_dev_error();
+
+	*CSR_PCICMD = cmd;
+	*CSR_SA110_CNTL = ctrl;
+}
+
+static struct irqaction irq_pci_error = {
+	irq_pci_err, SA_INTERRUPT, 0, "PCI error", NULL, NULL
+};
+
+__initfunc(void pcibios_init_ebsa285(void))
+{
+	setup_arm_irq(IRQ_PCI_ERR, &irq_pci_error);
+}
+
+/*
+ * Netwinder stuff
+ */
+#ifdef CONFIG_ARCH_NETWINDER
+
+/*
+ * Winbond WB83977F accessibility stuff
+ */
+static inline void wb977_open(void)
+{
+	outb(0x87, 0x370);
+	outb(0x87, 0x370);
+}
+
+static inline void wb977_close(void)
+{
+	outb(0xaa, 0x370);
+}
+
+static inline void wb977_wb(int reg, int val)
+{
+	outb(reg, 0x370);
+	outb(val, 0x371);
+}
+
+static inline void wb977_ww(int reg, int val)
+{
+	outb(reg, 0x370);
+	outb(val >> 8, 0x371);
+	outb(reg + 1, 0x370);
+	outb(val, 0x371);
+}
+
+#define wb977_device_select(dev)	wb977_wb(0x07, dev)
+#define wb977_device_disable()		wb977_wb(0x30, 0x00)
+#define wb977_device_enable()		wb977_wb(0x30, 0x01)
+
+/*
+ * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
+ */
+spinlock_t __netwinder_data gpio_lock = SPIN_LOCK_UNLOCKED;
+
+static unsigned int __netwinder_data current_gpio_op = 0;
+static unsigned int __netwinder_data current_gpio_io = 0;
+static unsigned int __netwinder_data current_cpld = 0;
+
+void __netwinder_text gpio_modify_op(int mask, int set)
+{
+	unsigned int new_gpio, changed;
+
+	new_gpio = (current_gpio_op & ~mask) | set;
+	changed = new_gpio ^ current_gpio_op;
+	current_gpio_op = new_gpio;
+
+	if (changed & 0xff)
+		outb(new_gpio, GP1_IO_BASE);
+	if (changed & 0xff00)
+		outb(new_gpio >> 8, GP2_IO_BASE);
+}
+
+static inline void __gpio_modify_io(int mask, int in)
+{
+	unsigned int new_gpio, changed;
+	int port;
+
+	new_gpio = (current_gpio_io & ~mask) | in;
+	changed = new_gpio ^ current_gpio_io;
+	current_gpio_io = new_gpio;
+
+	changed >>= 1;
+	new_gpio >>= 1;
+
+	wb977_device_select(7);
+
+	for (port = 0xe1; changed && port < 0xe8; changed >>= 1) {
+		wb977_wb(port, new_gpio & 1);
+
+		port += 1;
+		new_gpio >>= 1;
+	}
+
+	wb977_device_select(8);
+
+	for (port = 0xe8; changed && port < 0xec; changed >>= 1) {
+		wb977_wb(port, new_gpio & 1);
+
+		port += 1;
+		new_gpio >>= 1;
+	}
+}
+
+void __netwinder_text gpio_modify_io(int mask, int in)
+{
+	/* Open up the SuperIO chip */
+	wb977_open();
+
+	__gpio_modify_io(mask, in);
+
+	/* Close up the EFER gate */
+	wb977_close();
+}
+
+int __netwinder_text gpio_read(void)
+{
+	return inb(GP1_IO_BASE) | inb(GP2_IO_BASE) << 8;
+}
+
+/*
+ * Initialise the Winbond W83977F global registers
+ */
+static inline void wb977_init_global(void)
+{
+	/*
+	 * Enable R/W config registers
+	 */
+	wb977_wb(0x26, 0x40);
+
+	/*
+	 * Power down FDC (not used)
+	 */
+	wb977_wb(0x22, 0xfe);
+
+	/*
+	 * GP12, GP11, CIRRX, IRRXH, GP10
+	 */
+	wb977_wb(0x2a, 0xc1);
+
+	/*
+	 * GP23, GP22, GP21, GP20, GP13
+	 */
+	wb977_wb(0x2b, 0x6b);
+
+	/*
+	 * GP17, GP16, GP15, GP14
+	 */
+	wb977_wb(0x2c, 0x55);
+}
+
+/*
+ * Initialise the Winbond W83977F printer port
+ */
+static inline void wb977_init_printer(void)
+{
+	wb977_device_select(1);
+
+	/*
+	 * mode 1 == EPP
+	 */
+	wb977_wb(0xf0, 0x01);
+}
+
+/*
+ * Initialise the Winbond W83977F keyboard controller
+ */
+static inline void wb977_init_keyboard(void)
+{
+	wb977_device_select(5);
+
+	/*
+	 * Keyboard controller address
+	 */
+	wb977_ww(0x60, 0x0060);
+	wb977_ww(0x62, 0x0064);
+
+	/*
+	 * Keyboard IRQ 1, active high, edge trigger
+	 */
+	wb977_wb(0x70, 1);
+	wb977_wb(0x71, 0x02);
+
+	/*
+	 * Mouse IRQ 5, active high, edge trigger
+	 */
+	wb977_wb(0x72, 5);
+	wb977_wb(0x73, 0x02);
+
+	/*
+	 * KBC 8MHz
+	 */
+	wb977_wb(0xf0, 0x40);
+
+	/*
+	 * Enable device
+	 */
+	wb977_device_enable();
+}
+
+/*
+ * Initialise the Winbond W83977F Infra-Red device
+ */
+static inline void wb977_init_irda(void)
+{
+	wb977_device_select(6);
+
+	/*
+	 * IR base address
+	 */
+	wb977_ww(0x60, IRDA_IO_BASE);
+
+	/*
+	 * IRDA IRQ 6, active high, edge trigger
+	 */
+	wb977_wb(0x70, 6);
+	wb977_wb(0x71, 0x02);
+
+	/*
+	 * RX DMA - ISA DMA 0
+	 */
+	wb977_wb(0x74, 0x00);
+
+	/*
+	 * TX DMA - Disable Tx DMA
+	 */
+	wb977_wb(0x75, 0x04);
+
+	/*
+	 * Append CRC, Enable bank selection
+	 */
+	wb977_wb(0xf0, 0x03);
+
+	/*
+	 * Enable device
+	 */
+	wb977_device_enable();
+}
+
+/*
+ * Initialise Winbond W83977F general purpose IO
+ */
+static inline void wb977_init_gpio(void)
+{
+	unsigned long flags;
+
+	/*
+	 * Set up initial I/O definitions
+	 */
+	current_gpio_io = -1;
+	__gpio_modify_io(-1, GPIO_DONE | GPIO_WDTIMER);
+
+	wb977_device_select(7);
+
+	/*
+	 * Group1 base address
+	 */
+	wb977_ww(0x60, GP1_IO_BASE);
+	wb977_ww(0x62, 0);
+	wb977_ww(0x64, 0);
+
+	/*
+	 * GP10 (Orage button) IRQ 10, active high, edge trigger
+	 */
+	wb977_wb(0x70, 10);
+	wb977_wb(0x71, 0x02);
+
+	/*
+	 * GP10: Debounce filter enabled, IRQ, input
+	 */
+	wb977_wb(0xe0, 0x19);
+
+	/*
+	 * Enable Group1
+	 */
+	wb977_device_enable();
+
+	wb977_device_select(8);
+
+	/*
+	 * Group2 base address
+	 */
+	wb977_ww(0x60, GP2_IO_BASE);
+
+	/*
+	 * Clear watchdog timer regs
+	 *  - timer disable
+	 */
+	wb977_wb(0xf2, 0x00);
+
+	/*
+	 *  - disable LED, no mouse nor keyboard IRQ
+	 */
+	wb977_wb(0xf3, 0x00);
+
+	/*
+	 *  - timer counting, disable power LED, disable timeouot
+	 */
+	wb977_wb(0xf4, 0x00);
+
+	/*
+	 * Enable group2
+	 */
+	wb977_device_enable();
+
+	/*
+	 * Set Group1/Group2 outputs
+	 */
+	spin_lock_irqsave(&gpio_lock, flags);
+	gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN);
+	spin_unlock_irqrestore(&gpio_loc, flags);
+}
+
+/*
+ * Initialise the Winbond W83977F chip.
+ */
+__initfunc(static void wb977_init(void))
+{
+	request_region(0x370, 2, "W83977AF configuration");
+
+	/*
+	 * Open up the SuperIO chip
+	 */
+	wb977_open();
+
+	/*
+	 * Initialise the global registers
+	 */
+	wb977_init_global();
+
+	/*
+	 * Initialise the various devices in
+	 * the multi-IO chip.
+	 */
+	wb977_init_printer();
+	wb977_init_keyboard();
+	wb977_init_irda();
+	wb977_init_gpio();
+
+	/*
+	 * Close up the EFER gate
+	 */
+	wb977_close();
+}
+
+void __netwinder_text cpld_modify(int mask, int set)
+{
+	int msk;
+
+	current_cpld = (current_cpld & ~mask) | set;
+
+	gpio_modify_io(GPIO_DATA, 0);
+	gpio_modify_op(GPIO_IOLOAD, 0);
+
+	for (msk = 8; msk; msk >>= 1) {
+		int bit = current_cpld & msk;
+
+		gpio_modify_op(GPIO_DATA | GPIO_IOCLK, bit ? GPIO_DATA : 0);
+		gpio_modify_op(GPIO_IOCLK, GPIO_IOCLK);
+	}
+
+	gpio_modify_op(GPIO_IOCLK|GPIO_DATA, 0);
+	gpio_modify_op(GPIO_IOLOAD|GPIO_DSCLK, GPIO_IOLOAD|GPIO_DSCLK);
+	gpio_modify_op(GPIO_IOLOAD, 0);
+}
+
+__initfunc(static void cpld_init(void))
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	cpld_modify(-1, CPLD_UNMUTE | 4);
+	spin_unlock_irqrestore(&gpio_lock, flags);
+}
+
+static unsigned char rwa_unlock[] __initdata =
+{ 0x00, 0x00, 0x6a, 0xb5, 0xda, 0xed, 0xf6, 0xfb, 0x7d, 0xbe, 0xdf, 0x6f, 0x37, 0x1b,
+  0x0d, 0x86, 0xc3, 0x61, 0xb0, 0x58, 0x2c, 0x16, 0x8b, 0x45, 0xa2, 0xd1, 0xe8, 0x74,
+  0x3a, 0x9d, 0xce, 0xe7, 0x73, 0x39 };
+
+#ifndef DEBUG
+#define dprintk if (0) printk
+#else
+#define dprintk printk
+#endif
+
+#define WRITE_RWA(r,v) do { outb((r), 0x279); outb((v), 0xa79); } while (0)
+
+static inline void rwa010_unlock(void)
+{
+	int i;
+
+	WRITE_RWA(2, 2);
+	mdelay(10);
+
+	for (i = 0; i < sizeof(rwa_unlock); i++)
+		outb(rwa_unlock[i], 0x279);
+}
+
+static inline void rwa010_read_ident(void)
+{
+	unsigned char si[9];
+	int i, j;
+
+	WRITE_RWA(3, 0);
+	WRITE_RWA(0, 128);
+
+	outb(1, 0x279);
+
+	mdelay(10);
+
+	dprintk("Identifier: ");
+	for (i = 0; i < 9; i++) {
+		si[i] = 0;
+		for (j = 0; j < 8; j++) {
+			int bit;
+			mdelay(1);
+			inb(0x203);
+			mdelay(1);
+			bit = inb(0x203);
+			dprintk("%02X ", bit);
+			si[i] |= bit << j;
+		}
+		mdelay(10);
+		dprintk("%02X ", si[i]);
+	}
+	dprintk("\n");
+}
+
+static inline void rwa010_global_init(void)
+{
+	WRITE_RWA(6, 2);	// Assign a card no = 2
+
+	dprintk("Card no = %d\n", inb(0x203));
+
+	WRITE_RWA(7, 3);
+	WRITE_RWA(0x30, 0);
+
+	WRITE_RWA(7, 4);
+	WRITE_RWA(0x30, 0);
+
+	WRITE_RWA(7, 2);
+	WRITE_RWA(0x30, 0);
+}
+
+static inline void rwa010_game_port_init(void)
+{
+	int i;
+
+	WRITE_RWA(7, 5);
+
+	dprintk("Slider base: ");
+	WRITE_RWA(0x61, 1);
+	i = inb(0x203);
+
+	WRITE_RWA(0x60, 2);
+	dprintk("%02X%02X (201)\n", inb(0x203), i);
+
+	WRITE_RWA(0x30, 1);
+}
+
+static inline void rwa010_waveartist_init(int base, int irq, int dma)
+{
+	int i;
+
+	WRITE_RWA(7, 0);
+
+	dprintk("WaveArtist base: ");
+	WRITE_RWA(0x61, base);
+	i = inb(0x203);
+
+	WRITE_RWA(0x60, base >> 8);
+	dprintk("%02X%02X (%X),", inb(0x203), i, base);
+
+	WRITE_RWA(0x70, irq);
+	dprintk(" irq: %d (%d),", inb(0x203), irq);
+
+	WRITE_RWA(0x74, dma);
+	dprintk(" dma: %d (%d)\n", inb(0x203), dma);
+
+	WRITE_RWA(0x30, 1);
+}
+
+static inline void rwa010_soundblaster_init(int sb_base, int al_base, int irq, int dma)
+{
+	int i;
+
+	WRITE_RWA(7, 1);
+
+	dprintk("SoundBlaster base: ");
+	WRITE_RWA(0x61, sb_base);
+	i = inb(0x203);
+
+	WRITE_RWA(0x60, sb_base >> 8);
+	dprintk("%02X%02X (%X),", inb(0x203), i, sb_base);
+
+	dprintk(" irq: ");
+	WRITE_RWA(0x70, irq);
+	dprintk("%d (%d),", inb(0x203), irq);
+
+	dprintk(" 8-bit DMA: ");
+	WRITE_RWA(0x74, dma);
+	dprintk("%d (%d)\n", inb(0x203), dma);
+
+	dprintk("AdLib base: ");
+	WRITE_RWA(0x63, al_base);
+	i = inb(0x203);
+
+	WRITE_RWA(0x62, al_base >> 8);
+	dprintk("%02X%02X (%X)\n", inb(0x203), i, al_base);
+
+	WRITE_RWA(0x30, 1);
+}
+
+static void rwa010_soundblaster_reset(void)
+{
+	int i;
+
+	outb(1, 0x226);
+	udelay(3);
+	outb(0, 0x226);
+
+	for (i = 0; i < 5; i++) {
+		if (inb(0x22e) & 0x80)
+			break;
+		mdelay(1);
+	}
+	if (i == 5)
+		printk("SoundBlaster: DSP reset failed\n");
+
+	dprintk("SoundBlaster DSP reset: %02X (AA)\n", inb(0x22a));
+
+	for (i = 0; i < 5; i++) {
+		if ((inb(0x22c) & 0x80) == 0)
+			break;
+		mdelay(1);
+	}
+
+	if (i == 5)
+		printk("SoundBlaster: DSP not ready\n");
+	else {
+		outb(0xe1, 0x22c);
+
+		dprintk("SoundBlaster DSP id: ");
+		i = inb(0x22a);
+		udelay(1);
+		i |= inb(0x22a) << 8;
+		dprintk("%04X\n", i);
+
+		for (i = 0; i < 5; i++) {
+			if ((inb(0x22c) & 0x80) == 0)
+				break;
+			mdelay(1);
+		}
+
+		if (i == 5)
+			printk("SoundBlaster: could not turn speaker off\n");
+
+		outb(0xd3, 0x22c);
+	}
+
+	/* turn on OPL3 */
+	outb(5, 0x38a);
+	outb(1, 0x38b);
+}
+
+__initfunc(static void rwa010_init(void))
+{
+	rwa010_unlock();
+	rwa010_read_ident();
+	rwa010_global_init();
+	rwa010_game_port_init();
+	rwa010_waveartist_init(0x250, 3, 7);
+	rwa010_soundblaster_init(0x220, 0x388, 3, 1);
+	rwa010_soundblaster_reset();
+}
+
+EXPORT_SYMBOL(gpio_lock);
+EXPORT_SYMBOL(gpio_modify_op);
+EXPORT_SYMBOL(gpio_modify_io);
+EXPORT_SYMBOL(cpld_modify);
+
+#endif
+
+#ifdef CONFIG_LEDS
+#define DEFAULT_LEDS	0
+#else
+#define DEFAULT_LEDS	GPIO_GREEN_LED
+#endif
+
+__initfunc(void hw_init(void))
+{
+#ifdef CONFIG_ARCH_NETWINDER
+	/*
+	 * this ought to have a better home...
+	 * Since this calls the above routines, which are
+	 * compiled only if CONFIG_ARCH_NETWINDER is set,
+	 * these should only be parsed by the compiler
+	 * in the same circumstance.
+	 */
+	if (machine_is_netwinder()) {
+		unsigned long flags;
+
+		wb977_init();
+		cpld_init();
+		rwa010_init();
+
+		spin_lock_irqsave(&gpio_lock, flags);
+		gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
+#endif
+
+	leds_event(led_start);
+}
diff --git a/arch/arm/kernel/iic.c b/arch/arm/kernel/iic.c
index 6eb0122e8..c9a672a32 100644
--- a/arch/arm/kernel/iic.c
+++ b/arch/arm/kernel/iic.c
@@ -7,20 +7,24 @@
  */
 
 #include <linux/delay.h>
+#include <linux/errno.h>
 
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/ioc.h>
+
+#define FORCE_ONES	0xdc
 
 /*
  * if delay loop has been calibrated then us that,
  * else use IOC timer 1.
  */
-static void iic_delay (void)
+static void iic_delay(void)
 {
 	extern unsigned long loops_per_sec;
 	if (loops_per_sec != (1 << 12)) {
-		udelay(10);
+		udelay(100); /* was 10 */
 		return;
 	} else {
 		unsigned long flags;
@@ -30,7 +34,7 @@ static void iic_delay (void)
 		outb(255,  IOC_T1LTCHH);
 		outb(0,    IOC_T1GO);
 		outb(1<<6, IOC_IRQCLRA);			/* clear T1 irq */
-		outb(4,    IOC_T1LTCHL);
+		outb(10,   IOC_T1LTCHL); /* was 4 */
 		outb(0,    IOC_T1LTCHH);
 		outb(0,    IOC_T1GO);
 		while ((inb(IOC_IRQSTATA) & (1<<6)) == 0);
@@ -38,124 +42,207 @@ static void iic_delay (void)
 	}
 }
 
-static inline void iic_start (void)
+#define IIC_INIT()		dat = (inb(IOC_CONTROL) | FORCE_ONES) & ~3
+#define IIC_SET_DAT		outb(dat|=1, IOC_CONTROL);
+#define IIC_CLR_DAT		outb(dat&=~1, IOC_CONTROL);
+#define IIC_SET_CLK		outb(dat|=2, IOC_CONTROL);
+#define IIC_CLR_CLK		outb(dat&=~2, IOC_CONTROL);
+#define IIC_DELAY		iic_delay();
+#define IIC_READ_DATA()		(inb(IOC_CONTROL) & 1)
+
+static inline void iic_set_lines(int clk, int dat)
 {
-	unsigned char out;
+	int old;
 
-	out = inb(IOC_CONTROL) | 0xc2;
+	old = inb(IOC_CONTROL) | FORCE_ONES;
 
-	outb(out, IOC_CONTROL);
-	iic_delay();
+	old &= ~3;
+
+	if (clk)
+		old |= 2;
+	if (dat)
+		old |= 1;
+
+	outb(old, IOC_CONTROL);
 
-	outb(out ^ 1, IOC_CONTROL);
 	iic_delay();
 }
 
-static inline void iic_stop (void)
+static inline unsigned int iic_read_data(void)
 {
-	unsigned char out;
+	return inb(IOC_CONTROL) & 1;
+}
 
-	out = inb(IOC_CONTROL) | 0xc3;
+/*
+ * C: ==~~_
+ * D: =~~__
+ */
+static inline void iic_start(void)
+{
+	unsigned int dat;
 
-	iic_delay();
-	outb(out ^ 1, IOC_CONTROL);
+	IIC_INIT();
 
-	iic_delay();
-	outb(out, IOC_CONTROL);
+	IIC_SET_DAT
+	IIC_DELAY
+	IIC_SET_CLK
+	IIC_DELAY
+
+	IIC_CLR_DAT
+	IIC_DELAY
+	IIC_CLR_CLK
+	IIC_DELAY
 }
 
-static int iic_sendbyte (unsigned char b)
+/*
+ * C: __~~
+ * D: =__~
+ */
+static inline void iic_stop(void)
 {
-	unsigned char out, in;
-	int i;
+	unsigned int dat;
 
-	out = (inb(IOC_CONTROL) & 0xfc) | 0xc0;
+	IIC_INIT();
 
-	outb(out, IOC_CONTROL);
-	for (i = 7; i >= 0; i--) {
-		unsigned char c;
-		c = out | ((b & (1 << i)) ? 1 : 0);
+	IIC_CLR_DAT
+	IIC_DELAY
+	IIC_SET_CLK
+	IIC_DELAY
+	IIC_SET_DAT
+	IIC_DELAY
+}
 
-		outb(c, IOC_CONTROL);
-		iic_delay();
+/*
+ * C: __~_
+ * D: =___
+ */
+static inline void iic_acknowledge(void)
+{
+	unsigned int dat;
 
-		outb(c | 2, IOC_CONTROL);
-		iic_delay();
+	IIC_INIT();
 
-		outb(c, IOC_CONTROL);
-	}
-	outb(out | 1, IOC_CONTROL);
-	iic_delay();
+	IIC_CLR_DAT
+	IIC_DELAY
+	IIC_SET_CLK
+	IIC_DELAY
+	IIC_CLR_CLK
+	IIC_DELAY
+}
 
-	outb(out | 3, IOC_CONTROL);
-	iic_delay();
+/*
+ * C: __~_
+ * D: =~H~
+ */
+static inline int iic_is_acknowledged(void)
+{
+	unsigned int dat, ack_bit;
 
-	in = inb(IOC_CONTROL) & 1;
+	IIC_INIT();
 
-	outb(out | 1, IOC_CONTROL);
-	iic_delay();
+	IIC_SET_DAT
+	IIC_DELAY
+	IIC_SET_CLK
+	IIC_DELAY
 
-	outb(out, IOC_CONTROL);
-	iic_delay();
+	ack_bit = IIC_READ_DATA();
+
+	IIC_CLR_CLK
+	IIC_DELAY
+
+	return ack_bit == 0;
+}
+
+/*
+ * C: _~__~__~__~__~__~__~__~_
+ * D: =DDXDDXDDXDDXDDXDDXDDXDD
+ */
+static void iic_sendbyte(unsigned int b)
+{
+	unsigned int dat, i;
+
+	IIC_INIT();
+
+	for (i = 0; i < 8; i++) {
+		if (b & 128)
+			IIC_SET_DAT
+		else
+			IIC_CLR_DAT
+		IIC_DELAY
+
+		IIC_SET_CLK
+		IIC_DELAY
+		IIC_CLR_CLK
+		IIC_DELAY
 
-	if(in) {
-		printk("No acknowledge from RTC\n");
-		return 1;
-	} else
-		return 0;
+		b <<= 1;
+	}
 }
 
-static unsigned char iic_recvbyte (void)
+/*
+ * C: __~_~_~_~_~_~_~_~_
+ * D: =~HHHHHHHHHHHHHHHH
+ */
+static unsigned char iic_recvbyte(void)
 {
-	unsigned char out, in;
-	int i;
+	unsigned int dat, i, in;
 
-	out = (inb(IOC_CONTROL) & 0xfc) | 0xc0;
+	IIC_INIT();
+
+	IIC_SET_DAT
+	IIC_DELAY
 
-	outb(out, IOC_CONTROL);
 	in = 0;
-	for (i = 7; i >= 0; i--) {
-		outb(out | 1, IOC_CONTROL);
-		iic_delay();
-		outb(out | 3, IOC_CONTROL);
-		iic_delay();
-		in = (in << 1) | (inb(IOC_CONTROL) & 1);
-		outb(out | 1, IOC_CONTROL);
-		iic_delay();
+	for (i = 0; i < 8; i++) {
+		IIC_SET_CLK
+		IIC_DELAY
+
+		in = (in << 1) | IIC_READ_DATA();
+
+		IIC_CLR_CLK
+		IIC_DELAY
 	}
-	outb(out, IOC_CONTROL);
-	iic_delay();
-	outb(out | 2, IOC_CONTROL);
-	iic_delay();
 
 	return in;
 }
 
-void iic_control (unsigned char addr, unsigned char loc, unsigned char *buf, int len)
+int iic_control (unsigned char addr, unsigned char loc, unsigned char *buf, int len)
 {
-	iic_start();
+	int i, err = -EIO;
 
-	if (iic_sendbyte(addr & 0xfe))
+	iic_start();
+	iic_sendbyte(addr & 0xfe);
+	if (!iic_is_acknowledged())
 		goto error;
 
-	if (iic_sendbyte(loc))
+	iic_sendbyte(loc);
+	if (!iic_is_acknowledged())
 		goto error;
 
 	if (addr & 1) {
-		int i;
-
-		for (i = 0; i < len; i++)
-			if (iic_sendbyte (buf[i]))
-				break;
-	} else {
-		int i;
-
 		iic_stop();
 		iic_start();
 		iic_sendbyte(addr|1);
-		for (i = 0; i < len; i++)
-			buf[i] = iic_recvbyte ();
+		if (!iic_is_acknowledged())
+			goto error;
+
+		for (i = 0; i < len - 1; i++) {
+			buf[i] = iic_recvbyte();
+			iic_acknowledge();
+		}
+		buf[i] = iic_recvbyte();
+	} else {
+		for (i = 0; i < len; i++) {
+			iic_sendbyte(buf[i]);
+
+			if (!iic_is_acknowledged())
+				goto error;
+		}
 	}
+
+	err = 0;
 error:
 	iic_stop();
+
+	return err;
 }
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 99577f1b7..5d09ea540 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -6,9 +6,10 @@
 
 static struct vm_area_struct init_mmap = INIT_MMAP;
 static struct fs_struct init_fs = INIT_FS;
+static struct file * init_fd_array[NR_OPEN] = { NULL, };
 static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS;
-struct mm_struct init_mm = INIT_MM;
+struct mm_struct init_mm = INIT_MM(init_mm);
 
 /*
  * Initial task structure.
@@ -20,4 +21,5 @@ struct mm_struct init_mm = INIT_MM;
  *
  * The things we do for performance..
  */
-union task_union init_task_union __attribute__((__section__(".init.task"))) = { INIT_TASK };
+union task_union init_task_union __attribute__((__section__(".init.task"))) =
+		{ INIT_TASK(init_task_union.task) };
diff --git a/arch/arm/kernel/ioport.c b/arch/arm/kernel/ioport.c
deleted file mode 100644
index d375dcbdd..000000000
--- a/arch/arm/kernel/ioport.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * linux/arch/arm/kernel/ioport.c
- *
- * Io-port support is not used for ARM
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-/*asmlinkage void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value)
-{
-}*/
-
-asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
-	return -ENOSYS;
-}
-
-asmlinkage int sys_iopl(long ebx,long ecx,long edx,
-	     long esi, long edi, long ebp, long eax, long ds,
-	     long es, long fs, long gs, long orig_eax,
-	     long eip,long cs,long eflags,long esp,long ss)
-{
-	return -ENOSYS;
-}
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 332e8940d..ee6e07c6c 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -23,7 +23,6 @@
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
-#include <linux/timex.h>
 #include <linux/malloc.h>
 #include <linux/random.h>
 #include <linux/smp.h>
@@ -32,7 +31,6 @@
 
 #include <asm/hardware.h>
 #include <asm/io.h>
-#include <asm/pgtable.h>
 #include <asm/system.h>
 
 #ifndef SMP
@@ -46,10 +44,22 @@
 #define cliIF()
 #endif
 
+/*
+ * Maximum IRQ count.  Currently, this is arbitary.
+ * However, it should not be set too low to prevent
+ * false triggering.  Conversely, if it is set too
+ * high, then you could miss a stuck IRQ.
+ *
+ * Maybe we ought to set a timer and re-enable the
+ * IRQ at a later time?
+ */
+#define MAX_IRQ_CNT	100000
+
 unsigned int local_bh_count[NR_CPUS];
 unsigned int local_irq_count[NR_CPUS];
 spinlock_t irq_controller_lock;
 
+int setup_arm_irq(int, struct irqaction *);
 extern int get_fiq_list(char *);
 extern void init_FIQ(void);
 
@@ -60,17 +70,29 @@ struct irqdesc {
 	unsigned int	 probing  : 1;		/* IRQ in use for a probe     */
 	unsigned int	 probe_ok : 1;		/* IRQ can be used for probe  */
 	unsigned int	 valid    : 1;		/* IRQ claimable	      */
-	unsigned int	 unused   :26;
+	unsigned int	 noautoenable : 1;	/* don't automatically enable IRQ */
+	unsigned int	 unused   :25;
 	void (*mask_ack)(unsigned int irq);	/* Mask and acknowledge IRQ   */
 	void (*mask)(unsigned int irq);		/* Mask IRQ		      */
 	void (*unmask)(unsigned int irq);	/* Unmask IRQ		      */
 	struct irqaction *action;
-	unsigned int	 unused2[3];
+	/*
+	 * IRQ lock detection
+	 */
+	unsigned int	 lck_cnt;
+	unsigned int	 lck_pc;
+	unsigned int	 lck_jif;
 };
 
 static struct irqdesc irq_desc[NR_IRQS];
 
 /*
+ * Get architecture specific interrupt handlers
+ * and interrupt initialisation.
+ */
+#include <asm/arch/irq.h>
+
+/*
  * Dummy mask/unmask handler
  */
 static void dummy_mask_unmask_irq(unsigned int irq)
@@ -94,10 +116,12 @@ void enable_irq(unsigned int irq)
 
 	spin_lock_irqsave(&irq_controller_lock, flags);
 	cliIF();
-	irq_desc[irq].enabled = 1;
 	irq_desc[irq].probing = 0;
 	irq_desc[irq].triggered = 0;
-	irq_desc[irq].unmask(irq);
+	if (!irq_desc[irq].noautoenable) {
+		irq_desc[irq].enabled = 1;
+		irq_desc[irq].unmask(irq);
+	}
 	spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
@@ -119,21 +143,52 @@ int get_irq_list(char *buf)
 		*p++ = '\n';
 	}
 
-#ifdef CONFIG_ACORN
+#ifdef CONFIG_ARCH_ACORN
 	p += get_fiq_list(p);
 #endif
 	return p - buf;
 }
 
 /*
+ * IRQ lock detection.
+ *
+ * Hopefully, this should get us out of a few locked situations.
+ * However, it may take a while for this to happen, since we need
+ * a large number if IRQs to appear in the same jiffie with the
+ * same instruction pointer (or within 2 instructions).
+ */
+static void check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs)
+{
+	unsigned long instr_ptr = instruction_pointer(regs);
+
+	if (desc->lck_jif == jiffies &&
+	    desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) {
+		desc->lck_cnt += 1;
+
+		if (desc->lck_cnt > MAX_IRQ_CNT) {
+			printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq);
+			disable_irq(irq);
+		}
+	} else {
+		desc->lck_cnt = 0;
+		desc->lck_pc  = instruction_pointer(regs);
+		desc->lck_jif = jiffies;
+	}
+}
+
+/*
  * do_IRQ handles all normal device IRQ's
  */
 asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
 {
-	struct irqdesc * desc = irq_desc + irq;
+	struct irqdesc * desc;
 	struct irqaction * action;
 	int status, cpu;
 
+	irq = fixup_irq(irq);
+
+	desc = irq_desc + irq;
+
 	spin_lock(&irq_controller_lock);
 	desc->mask_ack(irq);
 	spin_unlock(&irq_controller_lock);
@@ -174,6 +229,12 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
 		}
 	}
 
+	/*
+	 * Debug measure - hopefully we can continue if an
+	 * IRQ lockup problem occurs...
+	 */
+	check_irq_lock(desc, irq, regs);
+
 	irq_exit(cpu, irq);
 
 	/*
@@ -181,15 +242,10 @@ asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
 	 * a return code from the irq handler to tell us
 	 * whether the handler wants us to do software bottom
 	 * half handling or not..
-	 *
-	 * ** IMPORTANT NOTE: do_bottom_half() ENABLES IRQS!!! **
-	 * **  WE MUST DISABLE THEM AGAIN, ELSE IDE DISKS GO   **
-	 * **                       AWOL                       **
 	 */
 	if (1) {
 		if (bh_active & bh_mask)
 			do_bottom_half();
-		__cli();
 	}
 }
 
@@ -227,11 +283,27 @@ int setup_arm_irq(int irq, struct irqaction * new)
 	struct irqaction *old, **p;
 	unsigned long flags;
 
-	if (new->flags & SA_SAMPLE_RANDOM)
+	/*
+	 * Some drivers like serial.c use request_irq() heavily,
+	 * so we have to be careful not to interfere with a
+	 * running system.
+	 */
+	if (new->flags & SA_SAMPLE_RANDOM) {
+		/*
+		 * This function might sleep, we want to call it first,
+		 * outside of the atomic block.
+		 * Yes, this might clear the entropy pool if the wrong
+		 * driver is attempted to be loaded, without actually
+		 * installing a new handler, but is this really a problem,
+		 * only the sysadmin is able to do this.
+		 */
 	        rand_initialize_irq(irq);
+	}
 
+	/*
+	 * The following block of code has to be executed atomically
+	 */
 	spin_lock_irqsave(&irq_controller_lock, flags);
-
 	p = &irq_desc[irq].action;
 	if ((old = *p) != NULL) {
 		/* Can't share interrupts unless both agree to */
@@ -252,28 +324,24 @@ int setup_arm_irq(int irq, struct irqaction * new)
 
 	if (!shared) {
 		irq_desc[irq].nomask = (new->flags & SA_IRQNOMASK) ? 1 : 0;
-		irq_desc[irq].enabled = 1;
 		irq_desc[irq].probing = 0;
-		irq_desc[irq].unmask(irq);
+		if (!irq_desc[irq].noautoenable) {
+			irq_desc[irq].enabled = 1;
+			irq_desc[irq].unmask(irq);
+		}
 	}
 
 	spin_unlock_irqrestore(&irq_controller_lock, flags);
 	return 0;
 }
 
-/*
- * Using "struct sigaction" is slightly silly, but there
- * are historical reasons and it works well, so..
- */
 int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *),
 		 unsigned long irq_flags, const char * devname, void *dev_id)
 {
 	unsigned long retval;
 	struct irqaction *action;
 
-	if (!irq_desc[irq].valid)
-		return -EINVAL;
-	if (!handler)
+	if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler)
 		return -EINVAL;
 
 	action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
@@ -299,28 +367,30 @@ void free_irq(unsigned int irq, void *dev_id)
 	struct irqaction * action, **p;
 	unsigned long flags;
 
-	if (!irq_desc[irq].valid) {
+	if (irq >= NR_IRQS || !irq_desc[irq].valid) {
 		printk(KERN_ERR "Trying to free IRQ%d\n",irq);
 #ifdef CONFIG_DEBUG_ERRORS
 		__backtrace();
 #endif
 		return;
 	}
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
 	for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
 		if (action->dev_id != dev_id)
 			continue;
 
 	    	/* Found it - now free it */
-		save_flags_cli (flags);
 		*p = action->next;
-		restore_flags (flags);
 		kfree(action);
-		return;
+		goto out;
 	}
 	printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
 #ifdef CONFIG_DEBUG_ERRORS
 	__backtrace();
 #endif
+out:
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 /* Start the interrupt probing.  Unlike other architectures,
@@ -346,7 +416,6 @@ unsigned long probe_irq_on(void)
 			continue;
 
 		irq_desc[i].probing = 1;
-		irq_desc[i].enabled = 1;
 		irq_desc[i].triggered = 0;
 		irq_desc[i].unmask(i);
 		irqs += 1;
@@ -364,7 +433,8 @@ unsigned long probe_irq_on(void)
 	 */
 	spin_lock_irq(&irq_controller_lock);
 	for (i = 0; i < NR_IRQS; i++) {
-		if (irq_desc[i].probing && irq_desc[i].triggered) {
+		if (irq_desc[i].probing &&
+		    irq_desc[i].triggered) {
 			irq_desc[i].probing = 0;
 			irqs -= 1;
 		}
@@ -383,7 +453,7 @@ unsigned long probe_irq_on(void)
 int probe_irq_off(unsigned long irqs)
 {
 	unsigned int i;
-	int irq_found = -1;
+	int irq_found = NO_IRQ;
 
 	/*
 	 * look at the interrupts, and find exactly one
@@ -393,7 +463,7 @@ int probe_irq_off(unsigned long irqs)
 	for (i = 0; i < NR_IRQS; i++) {
 		if (irq_desc[i].probing &&
 		    irq_desc[i].triggered) {
-			if (irq_found != -1) {
+			if (irq_found != NO_IRQ) {
 				irq_found = NO_IRQ;
 				goto out;
 			}
@@ -405,21 +475,19 @@ int probe_irq_off(unsigned long irqs)
 		irq_found = NO_IRQ;
 out:
 	spin_unlock_irq(&irq_controller_lock);
+
 	return irq_found;
 }
 
-/*
- * Get architecture specific interrupt handlers
- * and interrupt initialisation.
- */
-#include <asm/arch/irq.h>
-
 __initfunc(void init_IRQ(void))
 {
 	extern void init_dma(void);
 	int irq;
 
 	for (irq = 0; irq < NR_IRQS; irq++) {
+		irq_desc[irq].probe_ok = 0;
+		irq_desc[irq].valid    = 0;
+		irq_desc[irq].noautoenable = 0;
 		irq_desc[irq].mask_ack = dummy_mask_unmask_irq;
 		irq_desc[irq].mask     = dummy_mask_unmask_irq;
 		irq_desc[irq].unmask   = dummy_mask_unmask_irq;
diff --git a/arch/arm/kernel/leds-ebsa110.c b/arch/arm/kernel/leds-ebsa110.c
index cc2f7a91d..eb286347b 100644
--- a/arch/arm/kernel/leds-ebsa110.c
+++ b/arch/arm/kernel/leds-ebsa110.c
@@ -7,11 +7,13 @@
  *
  *  - Red - toggles state every 50 timer interrupts
  */
+#include <linux/module.h>
+
 #include <asm/hardware.h>
 #include <asm/leds.h>
 #include <asm/system.h>
 
-void leds_event(led_event_t ledevt)
+void ebsa110_leds_event(led_event_t ledevt)
 {
 	unsigned long flags;
 
@@ -28,3 +30,7 @@ void leds_event(led_event_t ledevt)
 
 	restore_flags(flags);
 }
+
+void (*leds_event)(led_event_t) = ebsa110_leds_event;
+
+EXPORT_SYMBOL(leds_event);
diff --git a/arch/arm/kernel/leds-ebsa285.c b/arch/arm/kernel/leds-ebsa285.c
deleted file mode 100644
index a8cf2e775..000000000
--- a/arch/arm/kernel/leds-ebsa285.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * arch/arm/kernel/leds-ebsa285.c
- *
- * Copyright (C) 1998 Russell King
- *
- * EBSA-285 LED control routines.  We use the leds as follows:
- *
- *  - Green - toggles state every 50 timer interrupts
- *  - Amber - On if system is not idle
- *  - Red   - currently unused
- */
-#include <asm/hardware.h>
-#include <asm/leds.h>
-#include <asm/system.h>
-
-static char led_state = XBUS_LED_RED | XBUS_LED_GREEN;
-
-void leds_event(led_event_t ledevt)
-{
-	unsigned long flags;
-
-	save_flags_cli(flags);
-
-	switch(ledevt) {
-	case led_idle_start:
-		led_state |= XBUS_LED_AMBER;
-		break;
-
-	case led_idle_end:
-		led_state &= ~XBUS_LED_AMBER;
-		break;
-
-	case led_timer:
-		led_state ^= XBUS_LED_GREEN;
-		break;
-
-	default:
-		break;
-	}
-
-	restore_flags(flags);
-
-	*XBUS_LEDS = led_state;
-}
diff --git a/arch/arm/kernel/leds-footbridge.c b/arch/arm/kernel/leds-footbridge.c
new file mode 100644
index 000000000..cb6c7f4b4
--- /dev/null
+++ b/arch/arm/kernel/leds-footbridge.c
@@ -0,0 +1,249 @@
+/*
+ * arch/arm/kernel/leds-footbridge.c
+ *
+ * Copyright (C) 1998-1999 Russell King
+ *
+ * EBSA-285 and NetWinder LED control routines.
+ *
+ * The EBSA-285 uses the leds as follows:
+ *  - Green - toggles state every 50 timer interrupts
+ *  - Amber - On if system is not idle
+ *  - Red   - currently unused
+ *
+ * The Netwinder uses the leds as follows:
+ *  - Green - toggles state every 50 timer interrupts
+ *  - Red   - On if the system is not idle
+ *
+ * Changelog:
+ *   02-05-1999	RMK	Various cleanups
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/hardware.h>
+#include <asm/leds.h>
+#include <asm/spinlock.h>
+#include <asm/system.h>
+
+#define LED_STATE_ENABLED	1
+#define LED_STATE_CLAIMED	2
+static char led_state;
+static char hw_led_state;
+
+static spinlock_t leds_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_ARCH_EBSA285
+
+static void __ebsa285_text ebsa285_leds_event(led_event_t evt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&leds_lock, flags);
+
+	switch (evt) {
+	case led_start:
+		hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN;
+#ifndef CONFIG_LEDS_IDLE
+		hw_led_state |= XBUS_LED_AMBER;
+#endif
+		led_state |= LED_STATE_ENABLED;
+		break;
+
+	case led_stop:
+		led_state &= ~LED_STATE_ENABLED;
+		break;
+
+	case led_claim:
+		led_state |= LED_STATE_CLAIMED;
+		hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN | XBUS_LED_AMBER;
+		break;
+
+	case led_release:
+		led_state &= ~LED_STATE_CLAIMED;
+		hw_led_state = XBUS_LED_RED | XBUS_LED_GREEN | XBUS_LED_AMBER;
+		break;
+
+#ifdef CONFIG_LEDS_TIMER
+	case led_timer:
+		if (!(led_state & LED_STATE_CLAIMED))
+			hw_led_state ^= XBUS_LED_GREEN;
+		break;
+#endif
+
+#ifdef CONFIG_LEDS_CPU
+	case led_idle_start:
+		if (!(led_state & LED_STATE_CLAIMED))
+			hw_led_state |= XBUS_LED_RED;
+		break;
+
+	case led_idle_end:
+		if (!(led_state & LED_STATE_CLAIMED))
+			hw_led_state &= ~XBUS_LED_RED;
+		break;
+#endif
+
+	case led_green_on:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state &= ~XBUS_LED_GREEN;
+		break;
+
+	case led_green_off:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state |= XBUS_LED_GREEN;
+		break;
+
+	case led_amber_on:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state &= ~XBUS_LED_AMBER;
+		break;
+
+	case led_amber_off:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state |= XBUS_LED_AMBER;
+		break;
+
+	case led_red_on:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state &= ~XBUS_LED_RED;
+		break;
+
+	case led_red_off:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state |= XBUS_LED_RED;
+		break;
+
+	default:
+		break;
+	}
+
+	if  (led_state & LED_STATE_ENABLED)
+		*XBUS_LEDS = hw_led_state;
+
+	spin_unlock_irqrestore(&leds_lock, flags);
+}
+
+#endif
+
+#ifdef CONFIG_ARCH_NETWINDER
+
+static void __netwinder_text netwinder_leds_event(led_event_t evt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&leds_lock, flags);
+
+	switch (evt) {
+	case led_start:
+		led_state |= LED_STATE_ENABLED;
+		hw_led_state = 0;
+		break;
+
+	case led_stop:
+		led_state &= ~LED_STATE_ENABLED;
+		break;
+
+	case led_claim:
+		led_state |= LED_STATE_CLAIMED;
+		hw_led_state = 0;
+		break;
+
+	case led_release:
+		led_state &= ~LED_STATE_CLAIMED;
+		hw_led_state = 0;
+		break;
+
+#ifdef CONFIG_LEDS_TIMER
+	case led_timer:
+		if (!(led_state & LED_STATE_CLAIMED))
+			hw_led_state ^= GPIO_GREEN_LED;
+		break;
+#endif
+
+#ifdef CONFIG_LEDS_CPU
+	case led_idle_start:
+		if (!(led_state & LED_STATE_CLAIMED))
+			hw_led_state &= ~GPIO_RED_LED;
+		break;
+
+	case led_idle_end:
+		if (!(led_state & LED_STATE_CLAIMED))
+			hw_led_state |= GPIO_RED_LED;
+		break;
+#endif
+
+	case led_green_on:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state |= GPIO_GREEN_LED;
+		break;
+
+	case led_green_off:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state &= ~GPIO_GREEN_LED;
+		break;
+
+	case led_amber_on:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state |= GPIO_GREEN_LED | GPIO_RED_LED;
+		break;
+
+	case led_amber_off:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state &= ~(GPIO_GREEN_LED | GPIO_RED_LED);
+		break;
+
+	case led_red_on:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state |= GPIO_RED_LED;
+		break;
+
+	case led_red_off:
+		if (led_state & LED_STATE_CLAIMED)
+			hw_led_state &= ~GPIO_RED_LED;
+		break;
+
+	default:
+		break;
+	}
+
+	spin_unlock_irqrestore(&leds_lock, flags);
+
+	if  (led_state & LED_STATE_ENABLED) {
+		spin_lock_irqsave(&gpio_lock, flags);
+		gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state);
+		spin_unlock_irqrestore(&gpio_lock, flags);
+	}
+}
+
+#endif
+
+static void dummy_leds_event(led_event_t evt)
+{
+}
+
+__initfunc(void
+init_leds_event(led_event_t evt))
+{
+	switch (machine_arch_type) {
+#ifdef CONFIG_ARCH_EBSA285
+	case MACH_TYPE_EBSA285:
+		leds_event = ebsa285_leds_event;
+		break;
+#endif
+#ifdef CONFIG_ARCH_NETWINDER
+	case MACH_TYPE_NETWINDER:
+		leds_event = netwinder_leds_event;
+		break;
+#endif
+
+	default:
+		leds_event = dummy_leds_event;
+	}
+
+	leds_event(evt);
+}
+
+void (*leds_event)(led_event_t) = init_leds_event;
+
+EXPORT_SYMBOL(leds_event);
diff --git a/arch/arm/kernel/oldlatches.c b/arch/arm/kernel/oldlatches.c
index c4674cd35..a908241d2 100644
--- a/arch/arm/kernel/oldlatches.c
+++ b/arch/arm/kernel/oldlatches.c
@@ -4,6 +4,7 @@
  * (c) David Alan Gilbert 1995/1996
  */
 #include <linux/kernel.h>
+#include <linux/init.h>
 
 #include <asm/io.h>
 #include <asm/hardware.h>
@@ -40,7 +41,7 @@ void oldlatch_bupdate(unsigned char mask,unsigned char newdata)
 }
 #endif
 
-void oldlatch_init(void)
+void __init oldlatch_init(void)
 {
     printk("oldlatch: init\n");
 #ifdef LATCHAADDR
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 6ea02d891..68bf5aa1f 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -34,7 +34,6 @@
 #include <linux/init.h>
 
 #include <asm/uaccess.h>
-#include <asm/pgtable.h>
 #include <asm/system.h>
 #include <asm/io.h>
 
@@ -55,46 +54,37 @@ void enable_hlt(void)
 }
 
 /*
- * The idle loop on an arm..
+ * The idle loop on an ARM...
  */
 asmlinkage int sys_idle(void)
 {
-	int ret = -EPERM;
-
-	lock_kernel();
 	if (current->pid != 0)
-		goto out;
+		return -EPERM;
+
 	/* endless idle loop with no priority at all */
-	current->priority = -100;
-	for (;;)
-	{
+	while (1) {
+		if (!current->need_resched && !hlt_counter)
+			proc_idle();
+		current->policy = SCHED_YIELD;
+		schedule();
+#ifndef CONFIG_NO_PGT_CACHE
 		check_pgt_cache();
-#if 0 //def ARCH_IDLE_OK
-		if (!hlt_counter && !current->need_resched)
-			proc_idle ();
 #endif
-		run_task_queue(&tq_scheduler);
-		schedule();
 	}
-	ret = 0;
-out:
-	unlock_kernel();
-	return ret;
 }
 
+static char reboot_mode = 'h';
+
 __initfunc(void reboot_setup(char *str, int *ints))
 {
+	reboot_mode = str[0];
 }
 
-/*
- * This routine reboots the machine by resetting the expansion cards via
- * their loaders, turning off the processor cache (if ARM3), copying the
- * first instruction of the ROM to 0, and executing it there.
- */
 void machine_restart(char * __unused)
 {
-	proc_hard_reset ();
-	arch_hard_reset ();
+	arch_reset(reboot_mode);
+	panic("Reboot failed\n");
+	while (1);
 }
 
 void machine_halt(void)
@@ -150,6 +140,67 @@ void show_regs(struct pt_regs * regs)
 }
 
 /*
+ * Task structure and kernel stack allocation.
+ *
+ * Taken from the i386 version.
+ */
+#ifdef CONFIG_CPU_32
+#define EXTRA_TASK_STRUCT	8
+static struct task_struct *task_struct_stack[EXTRA_TASK_STRUCT];
+static int task_struct_stack_ptr = -1;
+#endif
+
+struct task_struct *alloc_task_struct(void)
+{
+	struct task_struct *tsk;
+
+#ifndef EXTRA_TASK_STRUCT
+	tsk = ll_alloc_task_struct();
+#else
+	int index;
+
+	index = task_struct_stack_ptr;
+	if (index >= EXTRA_TASK_STRUCT/2)
+		goto use_cache;
+
+	tsk = ll_alloc_task_struct();
+
+	if (!tsk) {
+		index = task_struct_stack_ptr;
+
+		if (index >= 0) {
+use_cache:		tsk = task_struct_stack[index];
+			task_struct_stack_ptr = index - 1;
+		}
+	}
+#endif
+#ifdef CONFIG_SYSRQ
+	/* You need this if you want SYSRQ-T to give sensible stack
+	 * usage information
+	 */
+	if (tsk) {
+		char *p = (char *)tsk;
+		memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE);
+	}
+#endif
+
+	return tsk;
+}
+
+void free_task_struct(struct task_struct *p)
+{
+#ifdef EXTRA_TASK_STRUCT
+	int index = task_struct_stack_ptr + 1;
+
+	if (index < EXTRA_TASK_STRUCT) {
+		task_struct_stack[index] = p;
+		task_struct_stack_ptr = index;
+	} else
+#endif
+		ll_free_task_struct(p);
+}
+
+/*
  * Free current thread data structures etc..
  */
 void exit_thread(void)
@@ -179,9 +230,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
 	childregs = ((struct pt_regs *)((unsigned long)p + 8192)) - 1;
 	*childregs = *regs;
 	childregs->ARM_r0 = 0;
+	childregs->ARM_sp = esp;
 
 	save = ((struct context_save_struct *)(childregs)) - 1;
-	copy_thread_css(save);
+	init_thread_css(save);
 	p->tss.save = save;
 
 	return 0;
@@ -224,3 +276,29 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
 	dump->regs = *regs;
 	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
 }
+
+/*
+ * This is the mechanism for creating a new kernel thread.
+ *
+ * NOTE! Only a kernel-only process(ie the swapper or direct descendants
+ * who haven't done an "execve()") should use this: it will work within
+ * a system call from a "real" process, but the process memory space will
+ * not be free'd until both the parent and the child have exited.
+ */
+pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	extern int sys_exit(int) __attribute__((noreturn));
+	pid_t __ret;
+
+	__asm__ __volatile__(
+	"mov	r0, %1		@ kernel_thread sys_clone\n"
+"	mov	r1, #0\n"
+	__syscall(clone)"\n"
+"	mov	%0, r0"
+        : "=r" (__ret)
+        : "Ir" (flags | CLONE_VM) : "r0", "r1");
+	if (__ret == 0)
+		sys_exit((fn)(arg));
+	return __ret;
+}
+
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 5e3bdfe3b..fbc3a2187 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -34,11 +34,11 @@
  */
 static inline long get_stack_long(struct task_struct *task, int offset)
 {
-	unsigned char *stack;
+	struct pt_regs *regs;
 
-	stack = (unsigned char *)((unsigned long)task + 8192 - sizeof(struct pt_regs));
-	stack += offset << 2;
-	return *(unsigned long *)stack;
+	regs = (struct pt_regs *)((unsigned long)task + 8192 - sizeof(struct pt_regs));
+
+	return regs->uregs[offset];
 }
 
 /*
@@ -50,11 +50,12 @@ static inline long get_stack_long(struct task_struct *task, int offset)
 static inline long put_stack_long(struct task_struct *task, int offset,
 	unsigned long data)
 {
-	unsigned char *stack;
+	struct pt_regs *regs;
+
+	regs = (struct pt_regs *)((unsigned long)task + 8192 - sizeof(struct pt_regs));
+
+	regs->uregs[offset] = data;
 
-	stack = (unsigned char *)((unsigned long)task + 8192 - sizeof(struct pt_regs));
-	stack += offset << 2;
-	*(unsigned long *) stack = data;
 	return 0;
 }
 
@@ -157,11 +158,16 @@ repeat:
 	
 	if (MAP_NR(page) < max_mapnr) {
 		page += addr & ~PAGE_MASK;
+
+		flush_cache_range(vma->vm_mm, addr, addr + sizeof(unsigned long));
+
 		*(unsigned long *)page = data;
-		__flush_entry_to_ram(page);
+
+		clean_cache_area(page, sizeof(unsigned long));
+
+		set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+		flush_tlb_page(vma, addr & PAGE_MASK);
 	}
-	set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
-	flush_tlb();
 }
 
 /*
@@ -343,8 +349,7 @@ printk ("op2=r%02ldsh%dx%d", insn & 15, shift, type);
 printk ("=%08lX ", val);
 	return val;
 }
-#undef pc_pointer
-#define pc_pointer(x) ((x) & 0x03fffffc)
+
 int ptrace_set_bpt (struct task_struct *child)
 {
 	unsigned long insn, pc, alt;
@@ -651,7 +656,6 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 				return 0;
 			wake_up_process (child);
 			child->exit_code = SIGKILL;
-			ptrace_cancel_bpt (child);
 			/* make sure single-step breakpoint is gone. */
 			ptrace_cancel_bpt (child);
 			ret = 0;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index cddc3fab3..0b0a70087 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -56,12 +56,17 @@
 #define SUPPORT_CPU_SA110
 #endif
 
-#ifndef CONFIG_CMDLINE
-#define CONFIG_CMDLINE	"root=/dev/nfs rw"
-#endif
 #define MEM_SIZE	(16*1024*1024)
 #define COMMAND_LINE_SIZE 256
 
+#ifndef CONFIG_CMDLINE
+#define CONFIG_CMDLINE ""
+#endif
+
+extern void reboot_setup(char *str, int *ints);
+extern void fpe_init(void);
+extern void disable_hlt(void);
+
 struct drive_info_struct { char dummy[32]; } drive_info;
 struct screen_info screen_info = {
  orig_video_lines:	30,
@@ -87,20 +92,26 @@ const struct armversions armidlist[] = {
   /*-- Match -- --- Mask -- -- Manu --  Processor  uname -m   --- ELF STUFF ---
 	--- processor asm funcs --- */
 #if defined(CONFIG_CPU_26)
+  /* ARM2 fake ident */
   { 0x41560200, 0xfffffff0, "ARM/VLSI",	"arm2"	 , "armv1"  , "v1", 0,
 	&arm2_processor_functions   },
+  /* ARM250 fake ident */
   { 0x41560250, 0xfffffff0, "ARM/VLSI",	"arm250" , "armv2"  , "v2", HWCAP_SWP,
 	&arm250_processor_functions },
+  /* ARM3 processors */
   { 0x41560300, 0xfffffff0, "ARM/VLSI",	"arm3"	 , "armv2"  , "v2", HWCAP_SWP,
 	&arm3_processor_functions   },
 #elif defined(CONFIG_CPU_32)
 #ifdef SUPPORT_CPU_ARM6
+  /* ARM6 */
   { 0x41560600, 0xfffffff0, "ARM/VLSI",	"arm6"	 , "armv3"  , "v3", HWCAP_SWP,
 	&arm6_processor_functions   },
+  /* ARM610 */
   { 0x41560610, 0xfffffff0, "ARM/VLSI",	"arm610" , "armv3"  , "v3", HWCAP_SWP,
 	&arm6_processor_functions   },
 #endif
 #ifdef SUPPORT_CPU_ARM7
+  /* ARM7's have a strange numbering */
   { 0x41007000, 0xffffff00, "ARM/VLSI",	"arm7"	 , "armv3"  , "v3", HWCAP_SWP,
 	&arm7_processor_functions   },
   /* ARM710 IDs are non-standard */
@@ -108,10 +119,16 @@ const struct armversions armidlist[] = {
 	&arm7_processor_functions   },
 #endif
 #ifdef SUPPORT_CPU_SA110
-  { 0x4401a100, 0xfffffff0, "DEC",	"sa110"	 , "armv4"  , "v3", HWCAP_SWP|HWCAP_HALF,
+#ifdef CONFIG_ARCH_RPC
+  /* Acorn RiscPC's can't handle ARMv4 half-word instructions */
+  { 0x4401a100, 0xfffffff0, "Intel",	"sa110"	 , "armv4"  , "v4", HWCAP_SWP,
+	&sa110_processor_functions  },
+#else
+  { 0x4401a100, 0xfffffff0, "Intel",	"sa110"	 , "armv4"  , "v4", HWCAP_SWP|HWCAP_HALF,
 	&sa110_processor_functions  },
 #endif
 #endif
+#endif
   { 0x00000000, 0x00000000, "***", "unknown", "unknown", "**", 0, NULL }
 };
 
@@ -119,7 +136,7 @@ const struct armversions armidlist[] = {
  * From head-armv.S
  */
 unsigned int processor_id;
-unsigned int machine_type;
+unsigned int __machine_arch_type;
 int armidindex;
 
 extern int root_mountflags;
@@ -132,139 +149,10 @@ extern int _etext, _edata, _end;
  */
 
 /*
- * Risc-PC specific initialisation
- */
-#ifdef CONFIG_ARCH_RPC
-
-#include <asm/arch/mmu.h>
-
-unsigned int vram_half_sam;
-
-static void
-setup_rpc(struct param_struct *params)
-{
-	extern void init_dram_banks(const struct param_struct *params);
-
-	init_dram_banks(params);
-
-	switch (params->u1.s.pages_in_vram) {
-	case 256:
-		vram_half_sam = 1024;
-		break;
-	case 512:
-	default:
-		vram_half_sam = 2048;
-	}
-}
-#else
-#define setup_rpc(x)
-#endif
-
-#ifdef PARAMS_BASE
-
-#ifdef CONFIG_ARCH_ACORN
-int memc_ctrl_reg;
-int number_ide_drives;
-int number_mfm_drives;
-#endif
-
-static struct param_struct *params = (struct param_struct *)PARAMS_BASE;
-
-__initfunc(static char *
-setup_params(unsigned long *mem_end_p))
-{
-	ROOT_DEV	  = to_kdev_t(params->u1.s.rootdev);
-	ORIG_X		  = params->u1.s.video_x;
-	ORIG_Y		  = params->u1.s.video_y;
-	ORIG_VIDEO_COLS	  = params->u1.s.video_num_cols;
-	ORIG_VIDEO_LINES  = params->u1.s.video_num_rows;
-
-#ifdef CONFIG_ARCH_ACORN
-#ifndef CONFIG_FB
-	{
-		extern int bytes_per_char_h;
-		extern int bytes_per_char_v;
-
-		bytes_per_char_h  = params->u1.s.bytes_per_char_h;
-		bytes_per_char_v  = params->u1.s.bytes_per_char_v;
-	}
-#endif
-	memc_ctrl_reg	  = params->u1.s.memc_control_reg;
-	number_ide_drives = (params->u1.s.adfsdrives >> 6) & 3;
-	number_mfm_drives = (params->u1.s.adfsdrives >> 3) & 3;
-
-	setup_rpc(params);
-
-	if (!(params->u1.s.flags & FLAG_READONLY))
-		root_mountflags &= ~MS_RDONLY;
-#endif
-#ifdef CONFIG_BLK_DEV_RAM
-	{
-		extern int rd_doload;
-		extern int rd_prompt;
-		extern int rd_image_start;
-
-		rd_image_start = params->u1.s.rd_start;
-		rd_prompt = (params->u1.s.flags & FLAG_RDPROMPT) == 0;
-		rd_doload = (params->u1.s.flags & FLAG_RDLOAD) == 0;
-	}
-#endif
-
-#ifdef CONFIG_ARCH_ACORN
-	*mem_end_p = GET_MEMORY_END(params);
-#elif defined(CONFIG_ARCH_EBSA285)
-	*mem_end_p = PAGE_OFFSET + params->u1.s.page_size * params->u1.s.nr_pages;
-#else
-	*mem_end_p = PAGE_OFFSET + MEM_SIZE;
-#endif
-
-	return params->commandline;
-}
-
-#else
-
-static char default_command_line[] __initdata = CONFIG_CMDLINE;
-
-__initfunc(static char *
-setup_params(unsigned long *mem_end_p))
-{
-	ROOT_DEV	  = 0x00ff;
-
-#ifdef CONFIG_BLK_DEV_RAM
-	{
-		extern int rd_doload;
-		extern int rd_prompt;
-		extern int rd_image_start;
-
-		rd_image_start = 0;
-		rd_prompt = 1;
-		rd_doload = 1;
-	}
-#endif
-
-	*mem_end_p = PAGE_OFFSET + MEM_SIZE;
-
-	return default_command_line;
-}
-#endif
-
-/*
  * initial ram disk
  */
 #ifdef CONFIG_BLK_DEV_INITRD
 __initfunc(static void
-setup_initrd(const struct param_struct *params))
-{
-	if (params->u1.s.initrd_start) {
-		initrd_start = params->u1.s.initrd_start;
-		initrd_end   = initrd_start + params->u1.s.initrd_size;
-	} else {
-		initrd_start = 0;
-		initrd_end   = 0;
-	}
-}
-
-__initfunc(static void
 check_initrd(unsigned long mem_start, unsigned long mem_end))
 {
 	if (initrd_end > mem_end) {
@@ -276,7 +164,6 @@ check_initrd(unsigned long mem_start, unsigned long mem_end))
 }
 
 #else
-#define setup_initrd(p)
 #define check_initrd(ms,me)
 #endif
 
@@ -289,48 +176,47 @@ setup_processor(void))
 	       armidlist[armidindex].mask)
 		armidindex += 1;
 
-	if (armidlist[armidindex].id == 0) {
-#ifdef CONFIG_ARCH_ACORN
-		int i;
-
-		for (i = 0; i < 3200; i++)
-			((unsigned long *)SCREEN2_BASE)[i] = 0x77113322;
-#endif
+	if (armidlist[armidindex].id == 0)
 		while (1);
-	}
 
 	processor = *armidlist[armidindex].proc;
 	processor._proc_init();
 }
 
+static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
 static char command_line[COMMAND_LINE_SIZE] = { 0, };
        char saved_command_line[COMMAND_LINE_SIZE];
 
 __initfunc(static void
-setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_end))
+setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_sz))
 {
-	char c, *to = command_line;
+	char c = ' ', *to = command_line;
 	int len = 0;
 
 	*mem_start = (unsigned long)&_end;
 
 	for (;;) {
-		if (cmd_line[0] == ' ' &&
-		    cmd_line[1] == 'm' &&
-		    cmd_line[2] == 'e' &&
-		    cmd_line[3] == 'm' &&
-		    cmd_line[4] == '=') {
-			*mem_end = simple_strtoul(cmd_line+5, &cmd_line, 0);
-			switch(*cmd_line) {
-			case 'M':
-			case 'm':
-				*mem_end <<= 10;
-			case 'K':
-			case 'k':
-				*mem_end <<= 10;
+		if (c == ' ') {
+			if (cmd_line[0] == 'm' &&
+			    cmd_line[1] == 'e' &&
+			    cmd_line[2] == 'm' &&
+			    cmd_line[3] == '=') {
+				*mem_sz = simple_strtoul(cmd_line+4, &cmd_line, 0);
+				switch(*cmd_line) {
+				case 'M':
+				case 'm':
+					*mem_sz <<= 10;
+				case 'K':
+				case 'k':
+					*mem_sz <<= 10;
+					cmd_line++;
+				}
+			}
+			/* if there are two spaces, remove one */
+			if (*cmd_line == ' ') {
 				cmd_line++;
+				continue;
 			}
-			*mem_end = *mem_end + PAGE_OFFSET;
 		}
 		c = *cmd_line++;
 		if (!c)
@@ -341,42 +227,222 @@ setup_mem(char *cmd_line, unsigned long *mem_start, unsigned long *mem_end))
 	}
 
 	*to = '\0';
+
+	/* remove trailing spaces */
+	while (*--to == ' ' && to != command_line)
+		*to = '\0';
+}
+
+__initfunc(static void
+setup_ram(int doload, int prompt, int image_start))
+{
+#ifdef CONFIG_BLK_DEV_RAM
+	extern int rd_doload;
+	extern int rd_prompt;
+	extern int rd_image_start;
+
+	rd_image_start = image_start;
+	rd_prompt = prompt;
+	rd_doload = doload;
+#endif
 }
 
+/*
+ * initial ram disk
+ */
+__initfunc(static void
+setup_initrd(unsigned int start, unsigned int size))
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (start) {
+		initrd_start = start;
+		initrd_end   = start + size;
+	} else {
+		initrd_start = 0;
+		initrd_end   = 0;
+	}
+#endif
+}
+
+#ifdef CONFIG_ARCH_ACORN
+int memc_ctrl_reg;
+int number_mfm_drives;
+unsigned int vram_size;
+#endif
+
+#ifndef PARAMS_BASE
+#define PARAMS_BASE NULL
+#endif
+
+static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } };
+#define ENDIANNESS ((char)endian_test.l)
+
 __initfunc(void
 setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p))
 {
+	struct param_struct *params = (struct param_struct *)PARAMS_BASE;
 	static unsigned char smptrap;
-	unsigned long memory_end;
-	char endian = 'l';
-	char *from;
+	unsigned long memory_end = 0;
+	char *from = NULL;
 
 	if (smptrap == 1)
 		return;
 	smptrap = 1;
 
+#if defined(CONFIG_ARCH_ARC)
+	__machine_arch_type = MACH_TYPE_ARCHIMEDES;
+#elif defined(CONFIG_ARCH_A5K)
+	__machine_arch_type = MACH_TYPE_A5K;
+#endif
+
 	setup_processor();
 
-	from = setup_params(&memory_end);
-	setup_initrd(params);
+	init_task.mm->start_code = TASK_SIZE;
+	init_task.mm->end_code	 = TASK_SIZE + (unsigned long) &_etext;
+	init_task.mm->end_data	 = TASK_SIZE + (unsigned long) &_edata;
+	init_task.mm->brk	 = TASK_SIZE + (unsigned long) &_end;
+
+	/*
+	 * Add your machine dependencies here
+	 */
+	switch (machine_arch_type) {
+	case MACH_TYPE_EBSA110:
+		/* EBSA110 locks if we execute 'wait for interrupt' */
+		disable_hlt();
+		params = NULL;
+		break;
+
+	case MACH_TYPE_EBSA285:
+		if (params) {
+			ORIG_X		 = params->u1.s.video_x;
+			ORIG_Y		 = params->u1.s.video_y;
+			ORIG_VIDEO_COLS  = params->u1.s.video_num_cols;
+			ORIG_VIDEO_LINES = params->u1.s.video_num_rows;
+		}
+		break;
+
+	case MACH_TYPE_CO285:
+		{
+#if 0
+			extern unsigned long boot_memory_end;
+			extern char boot_command_line[];
+
+			from = boot_command_line;
+			memory_end = boot_memory_end;
+#endif
+			params = NULL;
+		}
+		break;
+
+	case MACH_TYPE_CATS:
+		/* CATS must use soft-reboot */
+		reboot_setup("s", NULL);
+		break;
+
+	case MACH_TYPE_NETWINDER:
+		/*
+		 * to be fixed in a future NeTTrom
+		 */
+		if (params->u1.s.page_size == 4096) {
+			if (params->u1.s.nr_pages != 0x2000 &&
+			    params->u1.s.nr_pages != 0x4000) {
+				printk("Warning: bad NeTTrom parameters detected, using defaults\n");
+			    	/*
+			    	 * This stuff doesn't appear to be initialised
+			    	 * properly by NeTTrom 2.0.6 and 2.0.7
+			    	 */
+				params->u1.s.nr_pages = 0x2000;	/* 32MB */
+				params->u1.s.ramdisk_size = 0;
+				params->u1.s.flags = FLAG_READONLY;
+				params->u1.s.initrd_start = 0;
+				params->u1.s.initrd_size = 0;
+				params->u1.s.rd_start = 0;
+				params->u1.s.video_x = 0;
+				params->u1.s.video_y = 0;
+				params->u1.s.video_num_cols = 80;
+				params->u1.s.video_num_rows = 30;
+			}
+		} else {
+			printk("Warning: no NeTTrom parameter page detected, using "
+			       "compiled-in settings\n");
+			params = NULL;
+		}
+		break;
+
+	default:
+		break;
+	}
+
+	if (params) {
+		memory_end	  = params->u1.s.page_size *
+				    params->u1.s.nr_pages;
+
+		ROOT_DEV	  = to_kdev_t(params->u1.s.rootdev);
+
+		setup_ram((params->u1.s.flags & FLAG_RDLOAD) == 0,
+			  (params->u1.s.flags & FLAG_RDPROMPT) == 0,
+			  params->u1.s.rd_start);
+
+		setup_initrd(params->u1.s.initrd_start,
+			     params->u1.s.initrd_size);
+
+		if (!(params->u1.s.flags & FLAG_READONLY))
+			root_mountflags &= ~MS_RDONLY;
+
+#ifdef CONFIG_ARCH_ACORN
+#ifdef CONFIG_ARCH_RPC
+		{
+			extern void init_dram_banks(struct param_struct *);
+			init_dram_banks(params);
+		}
+#endif
+
+		memc_ctrl_reg	  = params->u1.s.memc_control_reg;
+		number_mfm_drives = (params->u1.s.adfsdrives >> 3) & 3;
+		vram_size	  = 0;
+
+		switch (params->u1.s.pages_in_vram) {
+		case 512:
+			vram_size += PAGE_SIZE * 256;
+		case 256:
+			vram_size += PAGE_SIZE * 256;
+		default:
+			break;
+		}
+
+		memory_end -= vram_size;
+#endif
+
+		from = params->commandline;
+	} else {
+		ROOT_DEV	  = 0x00ff;
+
+		setup_ram(1, 1, 0);
+		setup_initrd(0, 0);
+	}
+
+	if (!memory_end)
+		memory_end = MEM_SIZE;
+
+	if (!from)
+		from = default_command_line;
+
+#ifdef CONFIG_NWFPE
+	fpe_init();
+#endif
 
 	/* Save unparsed command line copy for /proc/cmdline */
 	memcpy(saved_command_line, from, COMMAND_LINE_SIZE);
 	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
 
 	setup_mem(from, memory_start_p, &memory_end);
-	check_initrd(*memory_start_p, memory_end);
 
-	init_task.mm->start_code = TASK_SIZE;
-	init_task.mm->end_code	 = TASK_SIZE + (unsigned long) &_etext;
-	init_task.mm->end_data	 = TASK_SIZE + (unsigned long) &_edata;
-	init_task.mm->brk	 = TASK_SIZE + (unsigned long) &_end;
+	memory_end += PAGE_OFFSET;
 
-	*cmdline_p = command_line;
-	*memory_end_p = memory_end;
+	check_initrd(*memory_start_p, memory_end);
 
-	sprintf(system_utsname.machine, "%s%c", armidlist[armidindex].arch_vsn, endian);
-	sprintf(elf_platform, "%s%c", armidlist[armidindex].elf_vsn, endian);
+	sprintf(system_utsname.machine, "%s%c", armidlist[armidindex].arch_vsn, ENDIANNESS);
+	sprintf(elf_platform, "%s%c", armidlist[armidindex].elf_vsn, ENDIANNESS);
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
@@ -385,43 +451,26 @@ setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * mem
 	conswitchp = &dummy_con;
 #endif
 #endif
+
+	*cmdline_p = command_line;
+	*memory_end_p = memory_end;
 }
 
-static const struct {
-	char *machine_name;
-	char *bus_name;
-} machine_desc[] = {
-	{ "DEC-EBSA110",	"DEC"		},
-	{ "Acorn-RiscPC",	"Acorn"		},
-	{ "Nexus-NexusPCI",	"PCI"		},
-	{ "DEC-EBSA285",	"PCI"		},
-	{ "Corel-Netwinder",	"PCI/ISA"	},
-	{ "Chalice-CATS",	"PCI"		},
-	{ "unknown-TBOX",	"PCI"		}
+static const char *machine_desc[] = {
+	"EBSA110",
+	"Acorn-RiscPC",
+	"unknown",
+	"Nexus-FTV/PCI",
+	"EBSA285",
+	"Corel-NetWinder",
+	"Chalice-CATS",
+	"unknown-TBOX",
+	"co-EBSA285",
+	"CL-PS7110",
+	"Acorn-Archimedes",
+	"Acorn-A5000"
 };
 
-#if defined(CONFIG_ARCH_ARC)
-#define HARDWARE "Acorn-Archimedes"
-#define IO_BUS	 "Acorn"
-#elif defined(CONFIG_ARCH_A5K)
-#define HARDWARE "Acorn-A5000"
-#define IO_BUS	 "Acorn"
-#endif
-
-#if defined(CONFIG_CPU_ARM2)
-#define OPTIMISATION "ARM2"
-#elif defined(CONFIG_CPU_ARM3)
-#define OPTIMISATION "ARM3"
-#elif defined(CONFIG_CPU_ARM6)
-#define OPTIMISATION "ARM6"
-#elif defined(CONFIG_CPU_ARM7)
-#define OPTIMISATION "ARM7"
-#elif defined(CONFIG_CPU_SA110)
-#define OPTIMISATION "StrongARM"
-#else
-#define OPTIMISATION "unknown"
-#endif
-
 int get_cpuinfo(char * buffer)
 {
 	int len;
@@ -429,25 +478,12 @@ int get_cpuinfo(char * buffer)
 	len = sprintf(buffer,
 		"Processor\t: %s %s rev %d\n"
 		"BogoMips\t: %lu.%02lu\n"
-		"Hardware\t: %s\n"
-		"Optimisation\t: %s\n"
-		"IO Bus\t\t: %s\n",
+		"Hardware\t: %s\n",
 		armidlist[armidindex].manu,
 		armidlist[armidindex].name,
 		(int)processor_id & 15,
 		(loops_per_sec+2500) / 500000,
 		((loops_per_sec+2500) / 5000) % 100,
-#ifdef HARDWARE
-		HARDWARE,
-#else
-		machine_desc[machine_type].machine_name,
-#endif
-		OPTIMISATION,
-#ifdef IO_BUS
-		IO_BUS
-#else
-		machine_desc[machine_type].bus_name
-#endif
-		);
+		machine_desc[machine_arch_type]);
 	return len;
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 51e6bcb17..5ec48f752 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -28,7 +28,7 @@
 
 asmlinkage int sys_wait4(pid_t pid, unsigned long * stat_addr,
 			 int options, unsigned long *ru);
-asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs);
+asmlinkage int do_signal(sigset_t *oldset, struct pt_regs * regs, int syscall);
 extern int ptrace_cancel_bpt (struct task_struct *);
 extern int ptrace_set_bpt (struct task_struct *);
 
@@ -50,7 +50,7 @@ asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t m
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(&saveset, regs))
+		if (do_signal(&saveset, regs, 0))
 			return regs->ARM_r0;
 	}
 }
@@ -78,7 +78,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs)
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(&saveset, regs))
+		if (do_signal(&saveset, regs, 0))
 			return regs->ARM_r0;
 	}
 }
@@ -158,12 +158,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
 #ifdef CONFIG_CPU_32
 	err |= __get_user(regs->ARM_cpsr, &sc->arm_cpsr);
 #endif
-	if (!valid_user_regs(regs))
-		return 1;
 
-	/* send SIGTRAP if we're single-stepping */
-	if (ptrace_cancel_bpt (current))
-		send_sig (SIGTRAP, current, 1);
+	err |= !valid_user_regs(regs);
 
 	return err;
 }
@@ -173,6 +169,14 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
 	struct sigframe *frame;
 	sigset_t set;
 
+	/*
+	 * Since we stacked the signal on a word boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->ARM_sp & 3)
+		goto badframe;
+
 	frame = (struct sigframe *)regs->ARM_sp;
 
 	if (verify_area(VERIFY_READ, frame, sizeof (*frame)))
@@ -192,6 +196,10 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->sc))
 		goto badframe;
 
+	/* Send SIGTRAP if we're single-stepping */
+	if (ptrace_cancel_bpt (current))
+		send_sig(SIGTRAP, current, 1);
+
 	return regs->ARM_r0;
 
 badframe:
@@ -204,6 +212,14 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 	struct rt_sigframe *frame;
 	sigset_t set;
 
+	/*
+	 * Since we stacked the signal on a word boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->ARM_sp & 3)
+		goto badframe;
+
 	frame = (struct rt_sigframe *)regs->ARM_sp;
 
 	if (verify_area(VERIFY_READ, frame, sizeof (*frame)))
@@ -220,6 +236,10 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
+	/* Send SIGTRAP if we're single-stepping */
+	if (ptrace_cancel_bpt (current))
+		send_sig(SIGTRAP, current, 1);
+
 	return regs->ARM_r0;
 
 badframe:
@@ -260,6 +280,26 @@ setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/
 	return err;
 }
 
+static inline void *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+				 unsigned long framesize)
+{
+	unsigned long sp = regs->ARM_sp;
+
+	/*
+	 * This is the X/Open sanctioned signal stack switching.
+	 */
+	if ((ka->sa.sa_flags & SA_ONSTACK) && ! on_sig_stack(sp))
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	/*
+	 * No matter what happens, 'sp' must be word
+	 * aligned otherwise nasty things could happen
+	 */
+	sp &= ~3;
+
+	return (void *)(sp - framesize);
+}
+
 static void setup_frame(int sig, struct k_sigaction *ka,
 			sigset_t *set, struct pt_regs *regs)
 {
@@ -267,9 +307,9 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 	unsigned long retcode;
 	int err = 0;
 
-	frame = (struct sigframe *)regs->ARM_sp - 1;
+	frame = get_sigframe(ka, regs, sizeof(*frame));
 
-	if (!access_ok(VERIFT_WRITE, frame, sizeof (*frame)))
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto segv_and_exit;
 
 	err |= setup_sigcontext(&frame->sc, /*&frame->fpstate,*/ regs, set->sig[0]);
@@ -286,7 +326,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 	} else {
 		retcode = (unsigned long)&frame->retcode;
 		err |= __put_user(SWI_SYS_SIGRETURN, &frame->retcode);
-		__flush_entry_to_ram (&frame->retcode);
+		flush_icache_range(retcode, retcode + 4);
 	}
 
 	if (err)
@@ -299,6 +339,11 @@ static void setup_frame(int sig, struct k_sigaction *ka,
 	regs->ARM_sp = (unsigned long)frame;
 	regs->ARM_lr = retcode;
 	regs->ARM_pc = (unsigned long)ka->sa.sa_handler;
+#if defined(CONFIG_CPU_32)
+	/* Maybe we need to deliver a 32-bit signal to a 26-bit task. */
+	if (ka->sa.sa_flags & SA_THIRTYTWO)
+		regs->ARM_cpsr = USR_MODE;
+#endif
 	if (valid_user_regs(regs))
 		return;
 
@@ -315,7 +360,8 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	unsigned long retcode;
 	int err = 0;
 
-	frame = (struct rt_sigframe *)regs->ARM_sp - 1;
+	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe));
+
 	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto segv_and_exit;
 
@@ -337,7 +383,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	} else {
 		retcode = (unsigned long)&frame->retcode;
 		err |= __put_user(SWI_SYS_RT_SIGRETURN, &frame->retcode);
-		__flush_entry_to_ram (&frame->retcode);
+		flush_icache_range(retcode, retcode + 4);
 	}
 
 	if (err)
@@ -350,6 +396,11 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->ARM_sp = (unsigned long)frame;
 	regs->ARM_lr = retcode;
 	regs->ARM_pc = (unsigned long)ka->sa.sa_handler;
+#if defined(CONFIG_CPU_32)
+	/* Maybe we need to deliver a 32-bit signal to a 26-bit task. */
+	if (ka->sa.sa_flags & SA_THIRTYTWO)
+		regs->ARM_cpsr = USR_MODE;
+#endif
 	if (valid_user_regs(regs))
 		return;
 
@@ -393,18 +444,25 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
  * the kernel can handle, and then we build all the user-level signal handling
  * stack-frames in one go after that.
  */
-asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs)
+asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall)
 {
-	unsigned long instr, *pc = (unsigned long *)(instruction_pointer(regs)-4);
 	struct k_sigaction *ka;
 	siginfo_t info;
-	int single_stepping, swi_instr;
+	int single_stepping;
+
+	/*
+	 * We want the common case to go fast, which
+	 * is why we may in certain cases get here from
+	 * kernel mode. Just return without doing anything
+	 * if so.
+	 */
+	if (!user_mode(regs))
+		return 0;
 
 	if (!oldset)
 		oldset = &current->blocked;
 
 	single_stepping = ptrace_cancel_bpt (current);
-	swi_instr = (!get_user (instr, pc) && (instr & 0x0f000000) == 0x0f000000);
 
 	for (;;) {
 		unsigned long signr;
@@ -503,7 +561,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs)
 		}
 
 		/* Are we from a system call? */
-		if (swi_instr) {
+		if (syscall) {
 			switch (regs->ARM_r0) {
 			case -ERESTARTNOHAND:
 				regs->ARM_r0 = -EINTR;
@@ -527,7 +585,7 @@ asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs)
 		return 1;
 	}
 
-	if (swi_instr &&
+	if (syscall &&
 	    (regs->ARM_r0 == -ERESTARTNOHAND ||
 	     regs->ARM_r0 == -ERESTARTSYS ||
 	     regs->ARM_r0 == -ERESTARTNOINTR)) {
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index d50b90f8d..9da64aad0 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -223,13 +223,7 @@ out:
  */
 asmlinkage int sys_fork(struct pt_regs *regs)
 {
-	int ret;
-
-	lock_kernel();
-	ret = do_fork(SIGCHLD, regs->ARM_sp, regs);
-	unlock_kernel();
-
-	return ret;
+	return do_fork(SIGCHLD, regs->ARM_sp, regs);
 }
 
 /* Clone a task - this clones the calling program thread.
@@ -237,14 +231,14 @@ asmlinkage int sys_fork(struct pt_regs *regs)
  */
 asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs)
 {
-	int ret;
-
-	lock_kernel();
 	if (!newsp)
 		newsp = regs->ARM_sp;
-	ret = do_fork(clone_flags, newsp, regs);
-	unlock_kernel();
-	return ret;
+	return do_fork(clone_flags, newsp, regs);
+}
+
+asmlinkage int sys_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs);
 }
 
 /* sys_execve() executes a new program.
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index b6448e942..c874a1ba8 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -129,27 +129,12 @@ void do_settimeofday(struct timeval *tv)
 	time_status |= STA_UNSYNC;
 	time_maxerror = NTP_PHASE_LIMIT;
 	time_esterror = NTP_PHASE_LIMIT;
-	sti ();
+	sti();
 }
 
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick.
- */
-static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-	if (reset_timer ())
-		do_timer(regs);
-
-	update_rtc ();
-}
-
-static struct irqaction irqtimer = { timer_interrupt, 0, 0, "timer", NULL, NULL};
-
 __initfunc(void time_init(void))
 {
-	xtime.tv_sec = setup_timer();
 	xtime.tv_usec = 0;
 
-	setup_arm_irq(IRQ_TIMER, &irqtimer);
+	setup_timer();
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 5d04f325b..9267fec09 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -24,7 +24,6 @@
 #include <asm/atomic.h>
 #include <asm/pgtable.h>
 
-extern void die_if_kernel(char *str, struct pt_regs *regs, int err, int ret);
 extern void c_backtrace (unsigned long fp, int pmode);
 extern int ptrace_cancel_bpt (struct task_struct *);
 
@@ -45,16 +44,17 @@ static inline void console_verbose(void)
 
 int kstack_depth_to_print = 200;
 
-static int verify_stack_pointer (unsigned long stackptr, int size)
+/*
+ * Stack pointers should always be within the kernels view of
+ * physical memory.  If it is not there, then we can't dump
+ * out any information relating to the stack.
+ */
+static int verify_stack(unsigned long sp)
 {
-#ifdef CONFIG_CPU_26
-	if (stackptr < 0x02048000 || stackptr + size > 0x03000000)
-        	return -EFAULT;
-#else
-	if (stackptr < PAGE_OFFSET || stackptr + size > (unsigned long)high_memory)
+	if (sp < PAGE_OFFSET || sp > (unsigned long)high_memory)
 		return -EFAULT;
-#endif
-		return 0;
+
+	return 0;
 }
 
 /*
@@ -90,22 +90,26 @@ void dump_mem(unsigned long bottom, unsigned long top)
 
 static void dump_instr(unsigned long pc, int user)
 {
-	unsigned long module_start, module_end;
 	int pmin = -2, pmax = 3, ok = 0;
 	extern char start_kernel, _etext;
 
 	if (!user) {
+		unsigned long module_start, module_end;
+		unsigned long kernel_start, kernel_end;
+
 		module_start = VMALLOC_START;
 		module_end   = module_start + MODULE_RANGE;
 
-		if ((pc >= (unsigned long) &start_kernel) &&
-		    (pc <= (unsigned long) &_etext)) {
-			if (pc + pmin < (unsigned long) &start_kernel)
-				pmin = ((unsigned long) &start_kernel) - pc;
-			if (pc + pmax > (unsigned long) &_etext)
-				pmax = ((unsigned long) &_etext) - pc;
+		kernel_start = (unsigned long)&start_kernel;
+		kernel_end   = (unsigned long)&_etext;
+
+		if (pc >= kernel_start && pc < kernel_end) {
+			if (pc + pmin < kernel_start)
+				pmin = kernel_start - pc;
+			if (pc + pmax > kernel_end)
+				pmax = kernel_end - pc;
 			ok = 1;
-		} else if (pc >= module_start && pc <= module_end) {
+		} else if (pc >= module_start && pc < module_end) {
 			if (pc + pmin < module_start)
 				pmin = module_start - pc;
 			if (pc + pmax > module_end)
@@ -125,119 +129,138 @@ static void dump_instr(unsigned long pc, int user)
 		printk ("pc not in code space\n");
 }
 
-static void dump_state(char *str, struct pt_regs *regs, int err)
+spinlock_t die_lock;
+
+/*
+ * This function is protected against re-entrancy.
+ */
+void die(const char *str, struct pt_regs *regs, int err)
 {
+	struct task_struct *tsk = current;
+
+	spin_lock_irq(&die_lock);
+
 	console_verbose();
 	printk("Internal error: %s: %x\n", str, err);
 	printk("CPU: %d\n", smp_processor_id());
 	show_regs(regs);
 	printk("Process %s (pid: %d, stackpage=%08lx)\n",
-		current->comm, current->pid, 4096+(unsigned long)current);
+		current->comm, current->pid, 4096+(unsigned long)tsk);
+
+	if (!user_mode(regs)) {
+		unsigned long sp = (unsigned long)(regs + 1);
+		unsigned long fp;
+		int dump_info = 1;
+
+		printk("Stack: ");
+		if (verify_stack(sp)) {
+			printk("invalid kernel stack pointer %08lx", sp);
+			dump_info = 0;
+		} else if (sp < 4096+(unsigned long)tsk)
+			printk("kernel stack pointer underflow");
+		printk("\n");
+
+		if (dump_info)
+			dump_mem(sp - 16, 8192+(unsigned long)tsk);
+
+		dump_info = 1;
+
+		printk("Backtrace: ");
+		fp = regs->ARM_fp;
+		if (!fp) {
+			printk("no frame pointer");
+			dump_info = 0;
+		} else if (verify_stack(fp)) {
+			printk("invalid frame pointer %08lx", fp);
+			dump_info = 0;
+		} else if (fp < 4096+(unsigned long)tsk)
+			printk("frame pointer underflow");
+		printk("\n");
+
+		if (dump_info)
+			c_backtrace(fp, processor_mode(regs));
+
+		dump_instr(instruction_pointer(regs), 0);
+	}
+
+	spin_unlock_irq(&die_lock);	
 }
 
-/*
- * This function is protected against kernel-mode re-entrancy.  If it
- * is re-entered it will hang the system since we can't guarantee in
- * this case that any of the functions that it calls are safe any more.
- * Even the panic function could be a problem, but we'll give it a go.
- */
-void die_if_kernel(char *str, struct pt_regs *regs, int err, int ret)
+static void die_if_kernel(const char *str, struct pt_regs *regs, int err)
 {
-	static int died = 0;
-	unsigned long cstack, sstack, frameptr;
-	
 	if (user_mode(regs))
     		return;
 
-	switch (died) {
-	case 2:
-		while (1);
-	case 1:
-		died ++;
-		panic ("die_if_kernel re-entered.  Major kernel corruption.  Please reboot me!");
-		break;
-	case 0:
-		died ++;
-		break;
-	}
-
-	dump_state(str, regs, err);
-
-	cstack = (unsigned long)(regs + 1);
-	sstack = 4096+(unsigned long)current;
-
-	printk("Stack: ");
-	if (verify_stack_pointer(cstack, 4))
-		printk("invalid kernel stack pointer %08lx", cstack);
-	else if(cstack > sstack + 4096)
-		printk("(sp overflow)");
-	else if(cstack < sstack)
-		printk("(sp underflow)");
-	printk("\n");
-
-	dump_mem(cstack - 16, sstack + 4096);
-
-	frameptr = regs->ARM_fp;
-	if (frameptr) {
-		if (verify_stack_pointer (frameptr, 4))
-			printk ("Backtrace: invalid frame pointer\n");
-		else {
-			printk("Backtrace: \n");
-			c_backtrace (frameptr, processor_mode(regs));
-		}
-	}
-
-	dump_instr(instruction_pointer(regs), 0);
-	died = 0;
-	if (ret != -1)
-		do_exit (ret);
-	else {
-		cli ();
-		while (1);
-	}
+    	die(str, regs, err);
 }
 
-void bad_user_access_alignment (const void *ptr)
+void bad_user_access_alignment(const void *ptr)
 {
-	void *pc;
-	__asm__("mov %0, lr\n": "=r" (pc));
-	printk (KERN_ERR "bad_user_access_alignment called: ptr = %p, pc = %p\n", ptr, pc);
+	printk(KERN_ERR "bad user access alignment: ptr = %p, pc = %p\n", ptr, 
+		__builtin_return_address(0));
 	current->tss.error_code = 0;
 	current->tss.trap_no = 11;
-	force_sig (SIGBUS, current);
-/*	die_if_kernel("Oops - bad user access alignment", regs, mode, SIGBUS);*/
+	force_sig(SIGBUS, current);
+/*	die_if_kernel("Oops - bad user access alignment", regs, mode);*/
 }
 
-asmlinkage void do_undefinstr (int address, struct pt_regs *regs, int mode)
+asmlinkage void do_undefinstr(int address, struct pt_regs *regs, int mode)
 {
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_INFO "%s (%d): undefined instruction: pc=%08lx\n",
+		current->comm, current->pid, instruction_pointer(regs));
+#endif
 	current->tss.error_code = 0;
 	current->tss.trap_no = 6;
-	force_sig (SIGILL, current);
-	die_if_kernel("Oops - undefined instruction", regs, mode, SIGILL);
+	force_sig(SIGILL, current);
+	die_if_kernel("Oops - undefined instruction", regs, mode);
 }
 
-asmlinkage void do_excpt (int address, struct pt_regs *regs, int mode)
+asmlinkage void do_excpt(int address, struct pt_regs *regs, int mode)
 {
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_INFO "%s (%d): address exception: pc=%08lx\n",
+		current->comm, current->pid, instruction_pointer(regs));
+#endif
 	current->tss.error_code = 0;
 	current->tss.trap_no = 11;
-	force_sig (SIGBUS, current);
-	die_if_kernel("Oops - address exception", regs, mode, SIGBUS);
+	force_sig(SIGBUS, current);
+	die_if_kernel("Oops - address exception", regs, mode);
 }
 
 asmlinkage void do_unexp_fiq (struct pt_regs *regs)
 {
 #ifndef CONFIG_IGNORE_FIQ
-	printk ("Hmm.  Unexpected FIQ received, but trying to continue\n");
-	printk ("You may have a hardware problem...\n");
+	printk("Hmm.  Unexpected FIQ received, but trying to continue\n");
+	printk("You may have a hardware problem...\n");
 #endif
 }
 
+/*
+ * bad_mode handles the impossible case in the vectors.
+ * If you see one of these, then it's extremely serious,
+ * and could mean you have buggy hardware.  It never
+ * returns, and never tries to sync.  We hope that we
+ * can dump out some state information...
+ */
 asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode)
 {
-	printk (KERN_CRIT "Bad mode in %s handler detected: mode %s\n",
-		handler[reason],
-		processor_modes[proc_mode]);
-	die_if_kernel ("Oops", regs, 0, -1);
+	console_verbose();
+
+	printk(KERN_CRIT "Bad mode in %s handler detected: mode %s\n",
+		handler[reason], processor_modes[proc_mode]);
+
+	/*
+	 * Dump out the vectors and stub routines
+	 */
+	printk(KERN_CRIT "Vectors:\n");
+	dump_mem(0, 0x40);
+	printk(KERN_CRIT "Stubs:\n");
+	dump_mem(0x200, 0x4b8);
+
+	die("Oops", regs, 0);
+	cli();
+	while(1);
 }
 
 /*
@@ -249,54 +272,85 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode)
  */
 asmlinkage void math_state_restore (void)
 {
-    	current->used_math = 1;
+	current->used_math = 1;
 }
 
-asmlinkage void arm_syscall (int no, struct pt_regs *regs)
+asmlinkage int arm_syscall (int no, struct pt_regs *regs)
 {
 	switch (no) {
 	case 0: /* branch through 0 */
 		force_sig(SIGSEGV, current);
-//		if (user_mode(regs)) {
-//			dump_state("branch through zero", regs, 0);
-//			if (regs->ARM_fp)
-//				c_backtrace (regs->ARM_fp, processor_mode(regs));
-//		}
-		die_if_kernel ("branch through zero", regs, 0, SIGSEGV);
+		die_if_kernel("branch through zero", regs, 0);
 		break;
 
 	case 1: /* SWI_BREAK_POINT */
 		regs->ARM_pc -= 4; /* Decrement PC by one instruction */
-		ptrace_cancel_bpt (current);
-		force_sig (SIGTRAP, current);
+		ptrace_cancel_bpt(current);
+		force_sig(SIGTRAP, current);
+		return regs->ARM_r0;
+
+	case 2:	/* sys_cacheflush */
+#ifdef CONFIG_CPU_32
+		/* r0 = start, r1 = length, r2 = flags */
+		processor.u.armv3v4._flush_cache_area(regs->ARM_r0,
+						      regs->ARM_r1,
+						      1);
+#endif
 		break;
 
 	default:
-		printk ("[%d] %s: arm syscall %d\n", current->pid, current->comm, no);
-		force_sig (SIGILL, current);
+		/* Calls 9f00xx..9f07ff are defined to return -ENOSYS
+		   if not implemented, rather than raising SIGILL.  This
+		   way the calling program can gracefully determine whether
+		   a feature is supported.  */
+		if (no <= 0x7ff)
+			return -ENOSYS;
+#ifdef CONFIG_DEBUG_USER
+		/* experiance shows that these seem to indicate that
+		 * something catastrophic has happened
+		 */
+		printk("[%d] %s: arm syscall %d\n", current->pid, current->comm, no);
 		if (user_mode(regs)) {
-			show_regs (regs);
-			c_backtrace (regs->ARM_fp, processor_mode(regs));
+			show_regs(regs);
+			c_backtrace(regs->ARM_fp, processor_mode(regs));
 		}
-		die_if_kernel ("Oops", regs, no, SIGILL);
+#endif
+		force_sig(SIGILL, current);
+		die_if_kernel("Oops", regs, no);
 		break;
 	}
+	return 0;
 }
 
 asmlinkage void deferred(int n, struct pt_regs *regs)
 {
-	dump_state("old system call", regs, n);
-	force_sig (SIGILL, current);
+	/* You might think just testing `handler' would be enough, but PER_LINUX
+	 * points it to no_lcall7 to catch undercover SVr4 binaries.  Gutted.
+	 */
+	if (current->personality != PER_LINUX && current->exec_domain->handler) {
+		/* Hand it off to iBCS.  The extra parameter and consequent type 
+		 * forcing is necessary because of the weird ARM calling convention.
+		 */
+		void (*handler)(int nr, struct pt_regs *regs) = (void *)current->exec_domain->handler;
+		(*handler)(n, regs);
+		return;
+	}
+
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_ERR "[%d] %s: old system call.\n", current->pid, 
+	       current->comm);
+#endif
+	force_sig(SIGILL, current);
 }
 
 asmlinkage void arm_malalignedptr(const char *str, void *pc, volatile void *ptr)
 {
-	printk ("Mal-aligned pointer in %s: %p (PC=%p)\n", str, ptr, pc);
+	printk("Mal-aligned pointer in %s: %p (PC=%p)\n", str, ptr, pc);
 }
 
-asmlinkage void arm_invalidptr (const char *function, int size)
+asmlinkage void arm_invalidptr(const char *function, int size)
 {
-	printk ("Invalid pointer size in %s (PC=%p) size %d\n",
+	printk("Invalid pointer size in %s (pc=%p) size %d\n",
 		function, __builtin_return_address(0), size);
 }
 
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 684db2a47..0b241d333 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,14 +6,14 @@
 
 L_TARGET := lib.a
 L_OBJS   := backtrace.o bitops.o checksum.o delay.o io.o memcpy.o \
-	    system.o string.o uaccess.o
+	    semaphore.o string.o system.o uaccess.o
 
 ifeq ($(PROCESSOR),armo)
   L_OBJS += uaccess-armo.o
 endif
 
 ifdef CONFIG_ARCH_ACORN
-  L_OBJS += loaders.o ll_char_wr.o io-acorn.o
+  L_OBJS += loaders.o io-acorn.o
   ifdef CONFIG_ARCH_A5K
     L_OBJS += floppydma.o
   endif
@@ -26,12 +26,8 @@ ifeq ($(MACHINE),ebsa110)
   L_OBJS += io-ebsa110.o
 endif
 
-ifeq ($(MACHINE),vnc)
-  L_OBJS += io-ebsa285.o
-endif
-
-ifeq ($(MACHINE),ebsa285)
-  L_OBJS += io-ebsa285.o
+ifeq ($(MACHINE),footbridge)
+  L_OBJS += io-footbridge.o
 endif
 
 include $(TOPDIR)/Rules.make
@@ -45,10 +41,4 @@ getconsdata.o: getconsdata.c
 checksum.o: constants.h
 
 %.o: %.S
-ifneq ($(CONFIG_BINUTILS_NEW),y)
-	$(CC) $(CFLAGS) -D__ASSEMBLY__ -E $< | tr ';$$' '\n#' > ..tmp.$<.s
-	$(CC) $(CFLAGS:-pipe=) -c -o $@ ..tmp.$<.s
-	$(RM) ..tmp.$<.s
-else
 	$(CC) $(CFLAGS) -D__ASSEMBLY__ -c -o $@ $<
-endif
diff --git a/arch/arm/lib/checksum.S b/arch/arm/lib/checksum.S
index bd5c78d34..daf49fc94 100644
--- a/arch/arm/lib/checksum.S
+++ b/arch/arm/lib/checksum.S
@@ -520,13 +520,13 @@ Ldst_aligned:	tst	r0, #3
 		LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc})
 		ldr	r4, [r0], #4
 		tst	r2, #2
-		beq	Lexit
+		beq	Lexit_r4
 		adcs	r3, r3, r4, lsl #16
 		strb	r4, [r1], #1
 		mov	r4, r4, lsr #8
 		strb	r4, [r1], #1
 		mov	r4, r4, lsr #8
-		b	Lexit
+		b	Lexit_r4
 
 Ltoo_small:	teq	r2, #0
 		LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc})
@@ -538,10 +538,12 @@ Ltoo_small:	teq	r2, #0
 		adds	r3, r3, ip
 		strb	ip, [r1], #1
 		strb	r8, [r1], #1
-Lexit:		tst	r2, #1
-Ltoo_small1:	ldrneb	ip, [r0], #1
-		strneb	ip, [r1], #1
-		adcnes	r3, r3, ip
+		tst	r2, #1
+Ltoo_small1:	ldrneb	r4, [r0], #1
+Lexit_r4:	tst	r2, #1
+		strneb	r4, [r1], #1
+		andne	r4, r4, #255
+		adcnes	r3, r3, r4
 		adcs	r0, r3, #0
 		LOADREGS(ea,fp,{r4 - r8, fp, sp, pc})
 
@@ -598,13 +600,13 @@ Lsrc_not_aligned:
 		adceq	r0, r3, #0
 		LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc})
 		tst	r2, #2
-		beq	Lexit
+		beq	Lexit_r4
 		adcs	r3, r3, r4, lsl #16
 		strb	r4, [r1], #1
 		mov	r4, r4, lsr #8
 		strb	r4, [r1], #1
 		mov	r4, r4, lsr #8
-		b	Lexit
+		b	Lexit_r4
 
 Lsrc2_aligned:	mov	r4, r4, lsr #16
 		adds	r3, r3, #0
@@ -650,13 +652,13 @@ Lsrc2_aligned:	mov	r4, r4, lsr #16
 		adceq	r0, r3, #0
 		LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc})
 		tst	r2, #2
-		beq	Lexit
+		beq	Lexit_r4
 		adcs	r3, r3, r4, lsl #16
 		strb	r4, [r1], #1
 		mov	r4, r4, lsr #8
 		strb	r4, [r1], #1
 		ldrb	r4, [r0], #1
-		b	Lexit
+		b	Lexit_r4
 
 Lsrc3_aligned:	mov	r4, r4, lsr #24
 		adds	r3, r3, #0
@@ -702,14 +704,14 @@ Lsrc3_aligned:	mov	r4, r4, lsr #24
 		adceq	r0, r3, #0
 		LOADREGS(eqea,fp,{r4 - r8, fp, sp, pc})
 		tst	r2, #2
-		beq	Lexit
+		beq	Lexit_r4
 		adcs	r3, r3, r4, lsl #16
 		strb	r4, [r1], #1
 		ldr	r4, [r0], #4
 		strb	r4, [r1], #1
 		adcs	r3, r3, r4, lsl #24
 		mov	r4, r4, lsr #8
-		b	Lexit
+		b	Lexit_r4
 
 ENTRY(__csum_ipv6_magic)
 		stmfd	sp!, {lr}
diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S
index 08fdccb27..778d3e574 100644
--- a/arch/arm/lib/floppydma.S
+++ b/arch/arm/lib/floppydma.S
@@ -26,32 +26,3 @@ ENTRY(floppy_fiqout_start)
 		strb	r12, [r11, #-4]
 		subs	pc, lr, #4
 SYMBOL_NAME(floppy_fiqout_end):
-
-@ Params:
-@ r0 = length
-@ r1 = address
-@ r2 = floppy port
-@ Puts these into R9_fiq, R10_fiq, R11_fiq
-ENTRY(floppy_fiqsetup)
-		mov	ip, sp
-		stmfd	sp!, {fp, ip, lr, pc}
-		sub	fp, ip, #4
-		MODE(r3,ip,I_BIT|F_BIT|DEFAULT_FIQ)	@ disable FIQs, IRQs, FIQ mode
-		mov	r0, r0
-		mov	r9, r0
-		mov	r10, r1
-		mov	r11, r2
-		RESTOREMODE(r3)				@ back to normal
-		mov	r0, r0
-		LOADREGS(ea,fp,{fp, sp, pc})
-
-ENTRY(floppy_fiqresidual)
-		mov	ip, sp
-		stmfd	sp!, {fp, ip, lr, pc}
-		sub	fp, ip, #4
-		MODE(r3,ip,I_BIT|F_BIT|DEFAULT_FIQ)	@ disable FIQs, IRQs, FIQ mode
-		mov	r0, r0
-		mov	r0, r9
-		RESTOREMODE(r3)
-		mov	r0, r0
-		LOADREGS(ea,fp,{fp, sp, pc})
diff --git a/arch/arm/lib/getconsdata.c b/arch/arm/lib/getconsdata.c
index ba145eaff..27f4ca2ef 100644
--- a/arch/arm/lib/getconsdata.c
+++ b/arch/arm/lib/getconsdata.c
@@ -67,6 +67,23 @@ unsigned long PAGE_OLD = _PAGE_OLD;
 unsigned long PAGE_CLEAN = _PAGE_CLEAN;
 #endif
 
+#ifdef PTE_TYPE_SMALL
+unsigned long HPTE_TYPE_SMALL = PTE_TYPE_SMALL;
+unsigned long HPTE_AP_READ    = PTE_AP_READ;
+unsigned long HPTE_AP_WRITE   = PTE_AP_WRITE;
+#endif
+
+#ifdef L_PTE_PRESENT
+unsigned long LPTE_PRESENT    = L_PTE_PRESENT;
+unsigned long LPTE_YOUNG      = L_PTE_YOUNG;
+unsigned long LPTE_BUFFERABLE = L_PTE_BUFFERABLE;
+unsigned long LPTE_CACHEABLE  = L_PTE_CACHEABLE;
+unsigned long LPTE_USER       = L_PTE_USER;
+unsigned long LPTE_WRITE      = L_PTE_WRITE;
+unsigned long LPTE_EXEC       = L_PTE_EXEC;
+unsigned long LPTE_DIRTY      = L_PTE_DIRTY;
+#endif
+
 unsigned long KSWI_BASE = 0x900000;
 unsigned long KSWI_SYS_BASE = 0x9f0000;
 unsigned long SYS_ERROR0 = 0x9f0000;
diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S
index 6baa4cd50..bf2dd6333 100644
--- a/arch/arm/lib/io-acorn.S
+++ b/arch/arm/lib/io-acorn.S
@@ -11,50 +11,514 @@
 		.text
 		.align
 
-#define OUT(reg)						\
-		mov	r8, reg, lsl $16			;\
-		orr	r8, r8, r8, lsr $16			;\
-		str	r8, [r3, r0, lsl $2]			;\
-		mov	r8, reg, lsr $16			;\
-		orr	r8, r8, r8, lsl $16			;\
-		str	r8, [r3, r0, lsl $2]
-
-#define IN(reg)							\
-		ldr	reg, [r0]				;\
-		and	reg, reg, ip				;\
-		ldr	lr, [r0]				;\
-		orr	reg, reg, lr, lsl $16
-
-		.equ	pcio_base_high, PCIO_BASE & 0xff000000
-		.equ	pcio_base_low,	PCIO_BASE & 0x00ff0000
-		.equ	io_base_high, IO_BASE & 0xff000000
-		.equ	io_base_low, IO_BASE & 0x00ff0000
-
-		.equ	addr_io_diff_hi, pcio_base_high - io_base_high
-		.equ	addr_io_diff_lo, pcio_base_low - io_base_low
-
-		.macro	addr	reg, off
-		tst	\off, #0x80000000
-		.if	addr_io_diff_hi
-		movne	\reg, #IO_BASE
-		moveq	\reg, #pcio_base_high
-		.if	pcio_base_low
-		addeq	\reg, \reg, #pcio_base_low
-		.endif
-		.else
-		mov	\reg, #IO_BASE
-		addeq	\reg, \reg, #addr_io_diff_lo
-		.endif
+		.equ	diff_pcio_base, PCIO_BASE - IO_BASE
+
+		.macro	outw2	rd
+		mov	r8, \rd, lsl #16
+		orr	r8, r8, r8, lsr #16
+		str	r8, [r3, r0, lsl #2]
+		mov	r8, \rd, lsr #16
+		orr	r8, r8, r8, lsl #16
+		str	r8, [r3, r0, lsl #2]
+		.endm
+
+		.macro	inw2	rd, mask, temp
+		ldr	\rd, [r0]
+		and	\rd, \rd, \mask
+		ldr	\temp, [r0]
+		orr	\rd, \rd, \temp, lsl #16
 		.endm
 
-@ Purpose: read a block of data from a hardware register to memory.
-@ Proto  : insw(int from_port, void *to, int len_in_words);
-@ Proto  : inswb(int from_port, void *to, int len_in_bytes);
-@ Notes  : increment to
+		.macro	addr	rd
+		tst	\rd, #0x80000000
+		mov	\rd, \rd, lsl #2
+		add	\rd, \rd, #IO_BASE
+		addeq	\rd, \rd, #diff_pcio_base
+		.endm
+
+.iosw_bad_align_msg:
+		.ascii	"insw: bad buffer alignment (%p), called from %08lX\n\0"
+.iosl_warning:
+		.ascii	"<4>insl/outsl not implemented, called from %08lX\0"
+		.align
+
+/*
+ * These make no sense on Acorn machines.
+ * Print a warning message.
+ */
+ENTRY(insl)
+ENTRY(outsl)
+		adr	r0, .iosl_warning
+		mov	r1, lr
+		b	SYMBOL_NAME(printk)
+
+.iosw_bad_alignment:
+		adr	r0, .iosw_bad_align_msg
+		mov	r2, lr
+		b	SYMBOL_NAME(panic)
+
+
+/* Purpose: read a block of data from a hardware register to memory.
+ * Proto  : void insw(int from_port, void *to, int len_in_words);
+ * Notes  : increment to, 'to' must be 16-bit aligned
+ */
+
+.insw_align:	tst	r1, #1
+		bne	.iosw_bad_alignment
+
+		ldr	r3, [r0]
+		strb	r3, [r1], #1
+		mov	r3, r3, lsr #8
+		strb	r3, [r1], #1
+
+		subs	r2, r2, #1
+		bne	.insw_aligned
 
 ENTRY(insw)
+		teq	r2, #0
+		RETINSTR(moveq,pc,lr)
+		addr	r0
+		tst	r1, #3
+		bne	.insw_align
+
+.insw_aligned:	mov	ip, #0xff
+		orr	ip, ip, ip, lsl #8
+		stmfd	sp!, {r4, r5, r6, lr}
+
+		subs	r2, r2, #8
+		bmi	.no_insw_8
+
+.insw_8_lp:	ldr	r3, [r0]
+		and	r3, r3, ip
+		ldr	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+
+		ldr	r4, [r0]
+		and	r4, r4, ip
+		ldr	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+
+		ldr	r5, [r0]
+		and	r5, r5, ip
+		ldr	r6, [r0]
+		orr	r5, r5, r6, lsl #16
+
+		ldr	r6, [r0]
+		and	r6, r6, ip
+		ldr	lr, [r0]
+		orr	r6, r6, lr, lsl #16
+
+		stmia	r1!, {r3 - r6}
+		subs	r2, r2, #8
+		bpl	.insw_8_lp
+		tst	r2, #7
+		LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+
+.no_insw_8:	tst	r2, #4
+		beq	.no_insw_4
+
+		ldr	r3, [r0]
+		and	r3, r3, ip
+		ldr	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+
+		ldr	r4, [r0]
+		and	r4, r4, ip
+		ldr	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+
+		stmia	r1!, {r3, r4}
+
+.no_insw_4:	tst	r2, #2
+		beq	.no_insw_2
+
+		ldr	r3, [r0]
+		and	r3, r3, ip
+		ldr	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+
+		str	r3, [r1], #4
+
+.no_insw_2:	tst	r2, #1
+		ldrne	r3, [r0]
+		strneb	r3, [r1], #1
+		movne	r3, r3, lsr #8
+		strneb	r3, [r1]
+		LOADREGS(fd, sp!, {r4, r5, r6, pc})
+
+@ Purpose: write a block of data from memory to a hardware register.
+@ Proto  : outsw(int to_reg, void *from, int len_in_words);
+@ Notes  : increments from
+
+.outsw_align:	tst	r1, #1
+		bne	.iosw_bad_alignment
+
+		add	r1, r1, #2
+
+		ldr	r3, [r1, #-4]
+		mov	r3, r3, lsr #16
+		orr	r3, r3, r3, lsl #16
+		str	r3, [r0]
+		subs	r2, r2, #1
+		bne	.outsw_aligned
+
+ENTRY(outsw)
+		teq	r2, #0
+		RETINSTR(moveq,pc,lr)
+		addr	r0
+		tst	r1, #3
+		bne	.outsw_align
+
+.outsw_aligned:	stmfd	sp!, {r4, r5, r6, lr}
+
+		subs	r2, r2, #8
+		bmi	.no_outsw_8
+.outsw_8_lp:	ldmia	r1!, {r3, r4, r5, r6}
+
+		mov	ip, r3, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r3, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r5, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r5, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r6, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r6, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		subs	r2, r2, #8
+		bpl	.outsw_8_lp
+		tst	r2, #7
+		LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+
+.no_outsw_8:	tst	r2, #4
+		beq	.no_outsw_4
+
+		ldmia	r1!, {r3, r4}
+
+		mov	ip, r3, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r3, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+.no_outsw_4:	tst	r2, #2
+		beq	.no_outsw_2
+
+		ldr	r3, [r1], #4
+
+		mov	ip, r3, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r3, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+.no_outsw_2:	tst	r2, #1
+
+		ldrne	r3, [r1]
+
+		movne	ip, r3, lsl #16
+		orrne	ip, ip, ip, lsr #16
+		strne	ip, [r0]
+
+		LOADREGS(fd, sp!, {r4, r5, r6, pc})
+
+.insb_align:	rsb	ip, ip, #4
+		cmp	ip, r2
+		movgt	ip, r2
+		cmp	ip, #2
+		ldrb	r3, [r0]
+		strb	r3, [r1], #1
+		ldrgeb	r3, [r0]
+		strgeb	r3, [r1], #1
+		ldrgtb	r3, [r0]
+		strgtb	r3, [r1], #1
+		subs	r2, r2, ip
+		bne	.insb_aligned
+
+ENTRY(insb)
+		teq	r2, #0
+		moveq	pc, lr
+		addr	r0
+		ands	ip, r1, #3
+		bne	.insb_align
+
+.insb_aligned:	stmfd	sp!, {r4 - r6, lr}
+
+		subs	r2, r2, #16
+		bmi	.insb_no_16
+
+.insb_16_lp:	ldrb	r3, [r0]
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #8
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #24
+		ldrb	r4, [r0]
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #8
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #24
+		ldrb	r5, [r0]
+		ldrb	r6, [r0]
+		orr	r5, r5, r6, lsl #8
+		ldrb	r6, [r0]
+		orr	r5, r5, r6, lsl #16
+		ldrb	r6, [r0]
+		orr	r5, r5, r6, lsl #24
+		ldrb	r6, [r0]
+		ldrb	ip, [r0]
+		orr	r6, r6, ip, lsl #8
+		ldrb	ip, [r0]
+		orr	r6, r6, ip, lsl #16
+		ldrb	ip, [r0]
+		orr	r6, r6, ip, lsl #24
+		stmia	r1!, {r3 - r6}
+		subs	r2, r2, #16
+		bpl	.insb_16_lp
+
+		tst	r2, #15
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+
+.insb_no_16:	tst	r2, #8
+		beq	.insb_no_8
+
+		ldrb	r3, [r0]
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #8
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #24
+		ldrb	r4, [r0]
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #8
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #24
+		stmia	r1!, {r3, r4}
+
+.insb_no_8:	tst	r2, #4
+		bne	.insb_no_4
+
+		ldrb	r3, [r0]
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #8
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #24
+		str	r3, [r1], #4
+
+.insb_no_4:	ands	r2, r2, #3
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+		cmp	r2, #2
+		ldrb	r3, [r0]
+		strb	r3, [r1], #1
+		ldrgeb	r3, [r0]
+		strgeb	r3, [r1], #1
+		ldrgtb	r3, [r0]
+		strgtb	r3, [r1]
+		LOADREGS(fd, sp!, {r4 - r6, pc})
+
+
+
+.outsb_align:	rsb	ip, ip, #4
+		cmp	ip, r2
+		mov	ip, r2
+		cmp	ip, #2
+		ldrb	r3, [r1], #1
+		strb	r3, [r0]
+		ldrgeb	r3, [r1], #1
+		strgeb	r3, [r0]
+		ldrgtb	r3, [r1], #1
+		strgtb	r3, [r0]
+		subs	r2, r2, ip
+		bne	.outsb_aligned
+
+ENTRY(outsb)
+		teq	r2, #0
+		moveq	pc, lr
+		addr	r0
+		ands	ip, r1, #3
+		bne	.outsb_align
+
+.outsb_aligned:	stmfd	sp!, {r4 - r6, lr}
+
+		subs	r2, r2, #16
+		bmi	.outsb_no_16
+
+.outsb_16_lp:	ldmia	r1!, {r3 - r6}
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+
+		strb	r5, [r0]
+		mov	r5, r5, lsr #8
+		strb	r5, [r0]
+		mov	r5, r5, lsr #8
+		strb	r5, [r0]
+		mov	r5, r5, lsr #8
+		strb	r5, [r0]
+
+		strb	r6, [r0]
+		mov	r6, r6, lsr #8
+		strb	r6, [r0]
+		mov	r6, r6, lsr #8
+		strb	r6, [r0]
+		mov	r6, r6, lsr #8
+		strb	r6, [r0]
+		subs	r2, r2, #16
+		bpl	.outsb_16_lp
+
+		tst	r2, #15
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+
+.outsb_no_16:	tst	r2, #8
+		beq	.outsb_no_8
+
+		ldmia	r1, {r3, r4}
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+
+.outsb_no_8:	tst	r2, #4
+		bne	.outsb_no_4
+
+		ldr	r3, [r1], #4
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+
+.outsb_no_4:	ands	r2, r2, #3
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+		cmp	r2, #2
+		ldrb	r3, [r1], #1
+		strb	r3, [r0]
+		ldrgeb	r3, [r1], #1
+		strgeb	r3, [r0]
+		ldrgtb	r3, [r1]
+		strgtb	r3, [r0]
+		LOADREGS(fd, sp!, {r4 - r6, pc})
+
+
+
+
+@ Purpose: write a memc register
+@ Proto  : void memc_write(int register, int value);
+@ Returns: nothing
+
+#if defined(CONFIG_CPU_26)
+ENTRY(memc_write)
+		cmp	r0, #7
+		RETINSTR(movgt,pc,lr)
+		mov	r0, r0, lsl #17
+		mov	r1, r1, lsl #15
+		mov	r1, r1, lsr #17
+		orr	r0, r0, r1, lsl #2
+		add	r0, r0, #0x03600000
+		strb	r0, [r0]
+		RETINSTR(mov,pc,lr)
+#define CPSR2SPSR(rt)
+#else
+#define CPSR2SPSR(rt) \
+		mrs	rt, cpsr; \
+		msr	spsr, rt
+#endif
+
+@ Purpose: call an expansion card loader to read bytes.
+@ Proto  : char read_loader(int offset, char *card_base, char *loader);
+@ Returns: byte read
+
+ENTRY(ecard_loader_read)
+		stmfd	sp!, {r4 - r12, lr}
+		mov	r11, r1
+		mov	r1, r0
+		CPSR2SPSR(r0)
+		mov	lr, pc
+		mov	pc, r2
+		LOADREGS(fd, sp!, {r4 - r12, pc})
+
+@ Purpose: call an expansion card loader to reset the card
+@ Proto  : void read_loader(int card_base, char *loader);
+@ Returns: byte read
+
+ENTRY(ecard_loader_reset)
+		stmfd	sp!, {r4 - r12, lr}
+		mov	r11, r0
+		CPSR2SPSR(r0)
+		mov	lr, pc
+		add	pc, r1, #8
+		LOADREGS(fd, sp!, {r4 - r12, pc})
+
+
+#if 0
 		mov	r2, r2, lsl#1
-ENTRY(inswb)
 		mov	ip, sp
 		stmfd	sp!, {r4 - r10, fp, ip, lr, pc}
 		sub	fp, ip, #4
@@ -122,14 +586,9 @@ Linsw_notaligned:
 		bgt	Linsw_notaligned
 		LOADREGS(ea, fp, {r4 - r10, fp, sp, pc})
 
-@ Purpose: write a block of data from memory to a hardware register.
-@ Proto  : outsw(int to_reg, void *from, int len_in_words);
-@ Proto  : outswb(int to_reg, void *from, int len_in_bytes);
-@ Notes  : increments from
 
 ENTRY(outsw)
-		mov	r2, r2, LSL#1
-ENTRY(outswb)
+		mov	r2, r2, lsl#1
 		mov	ip, sp
 		stmfd	sp!, {r4 - r8, fp, ip, lr, pc}
 		sub	fp, ip, #4
@@ -166,56 +625,5 @@ ENTRY(outswb)
 		bgt	3b
 		LOADREGS(ea, fp, {r4 - r8, fp, sp, pc})
 
-/*
- * These make no sense on Acorn machines atm.
- */
-ENTRY(insl)
-ENTRY(outsl)
-		RETINSTR(mov,pc,lr)
-
-@ Purpose: write a memc register
-@ Proto  : void memc_write(int register, int value);
-@ Returns: nothing
-
-#if defined(CONFIG_CPU_26)
-ENTRY(memc_write)
-		cmp	r0, #7
-		RETINSTR(movgt,pc,lr)
-		mov	r0, r0, lsl #17
-		mov	r1, r1, lsl #15
-		mov	r1, r1, lsr #17
-		orr	r0, r0, r1, lsl #2
-		add	r0, r0, #0x03600000
-		strb	r0, [r0]
-		RETINSTR(mov,pc,lr)
-#define CPSR2SPSR(rt)
-#else
-#define CPSR2SPSR(rt) \
-		mrs	rt, cpsr; \
-		msr	spsr, rt
 #endif
 
-@ Purpose: call an expansion card loader to read bytes.
-@ Proto  : char read_loader(int offset, char *card_base, char *loader);
-@ Returns: byte read
-
-ENTRY(ecard_loader_read)
-		stmfd	sp!, {r4 - r12, lr}
-		mov	r11, r1
-		mov	r1, r0
-		CPSR2SPSR(r0)
-		mov	lr, pc
-		mov	pc, r2
-		LOADREGS(fd, sp!, {r4 - r12, pc})
-
-@ Purpose: call an expansion card loader to reset the card
-@ Proto  : void read_loader(int card_base, char *loader);
-@ Returns: byte read
-
-ENTRY(ecard_loader_reset)
-		stmfd	sp!, {r4 - r12, lr}
-		mov	r11, r0
-		CPSR2SPSR(r0)
-		mov	lr, pc
-		add	pc, r1, #8
-		LOADREGS(fd, sp!, {r4 - r12, pc})
diff --git a/arch/arm/lib/io-ebsa110.S b/arch/arm/lib/io-ebsa110.S
index e0b8229a4..b29276ff7 100644
--- a/arch/arm/lib/io-ebsa110.S
+++ b/arch/arm/lib/io-ebsa110.S
@@ -22,6 +22,22 @@
 		ldr	lr, [r0]			;\
 		orr	reg, reg, lr, lsl $16
 
+/*
+ * These make no sense on these machines.
+ * Print a warning message.
+ */
+ENTRY(insl)
+ENTRY(outsl)
+ENTRY(insb)
+ENTRY(outsb)
+		adr	r0, io_long_warning
+		mov	r1, lr
+		b	SYMBOL_NAME(printk)
+
+io_long_warning:
+		.ascii	"<4>ins?/outs? not implemented on this architecture\0"
+		.align
+
 @ Purpose: read a block of data from a hardware register to memory.
 @ Proto  : insw(int from_port, void *to, int len_in_words);
 @ Proto  : inswb(int from_port, void *to, int len_in_bytes);
diff --git a/arch/arm/lib/io-ebsa285.S b/arch/arm/lib/io-footbridge.S
index a86983d43..0734c6042 100644
--- a/arch/arm/lib/io-ebsa285.S
+++ b/arch/arm/lib/io-footbridge.S
@@ -1,8 +1,16 @@
 #include <linux/linkage.h>
+#include <asm/hardware.h>
+
+		.equ	pcio_high, PCIO_BASE & 0xff000000
+		.equ	pcio_low,  PCIO_BASE & 0x00ffffff
+
+		.macro	ioaddr, rd,rn
+		add	\rd, \rn, #pcio_high
+		add	\rd, \rd, #pcio_low
+		.endm
 
 ENTRY(insl)
-		add	r0, r0, #0xff000000
-		add	r0, r0, #0x00e00000
+		ioaddr	r0, r0
 		ands	ip, r1, #3
 		bne	2f
 
@@ -14,49 +22,48 @@ ENTRY(insl)
 
 2:		cmp	ip, #2
 		ldr	ip, [r0]
-		blt	3f
-		bgt	4f
+		blt	4f
+		bgt	6f
 
 		strh	ip, [r1], #2
 		mov	ip, ip, lsr #16
-1:		subs	r2, r2, #1
+3:		subs	r2, r2, #1
 		ldrne	r3, [r0]
 		orrne	ip, ip, r3, lsl #16
 		strne	ip, [r1], #4
 		movne	ip, r3, lsr #16
-		bne	1b
+		bne	3b
 		strh	ip, [r1], #2
 		mov	pc, lr
 
-3:		strb	ip, [r1], #1
+4:		strb	ip, [r1], #1
 		mov	ip, ip, lsr #8
 		strh	ip, [r1], #2
 		mov	ip, ip, lsr #16
-1:		subs	r2, r2, #1
+5:		subs	r2, r2, #1
 		ldrne	r3, [r0]
 		orrne	ip, ip, r3, lsl #8
 		strne	ip, [r1], #4
 		movne	ip, r3, lsr #24
-		bne	1b
+		bne	5b
 		strb	ip, [r1], #1
 		mov	pc, lr
 
-4:		strb	ip, [r1], #1
+6:		strb	ip, [r1], #1
 		mov	ip, ip, lsr #8
-1:		subs	r2, r2, #1
+7:		subs	r2, r2, #1
 		ldrne	r3, [r0]
 		orrne	ip, ip, r3, lsl #24
 		strne	ip, [r1], #4
 		movne	ip, r3, lsr #8
-		bne	1b
+		bne	7b
 		strb	ip, [r1], #1
 		mov	ip, ip, lsr #8
 		strh	ip, [r1], #2
 		mov	pc, lr
 
 ENTRY(outsl)
-		add	r0, r0, #0xff000000
-		add	r0, r0, #0x00e00000
+		ioaddr	r0, r0
 		ands	ip, r1, #3
 		bne	2f
 
@@ -70,31 +77,31 @@ ENTRY(outsl)
 		cmp	ip, #2
 		ldr	ip, [r1], #4
 		mov	ip, ip, lsr #16
-		blt	3f
-		bgt	4f
+		blt	4f
+		bgt	5f
 
-1:		ldr	r3, [r1], #4
+3:		ldr	r3, [r1], #4
 		orr	ip, ip, r3, lsl #16
 		str	ip, [r0]
 		mov	ip, r3, lsr #16
 		subs	r2, r2, #1
-		bne	1b
+		bne	3b
 		mov	pc, lr
 
-3:		ldr	r3, [r1], #4
+4:		ldr	r3, [r1], #4
 		orr	ip, ip, r3, lsl #8
 		str	ip, [r0]
 		mov	ip, r3, lsr #24
 		subs	r2, r2, #1
-		bne	3b
+		bne	4b
 		mov	pc, lr
 
-4:		ldr	r3, [r1], #4
+5:		ldr	r3, [r1], #4
 		orr	ip, ip, r3, lsl #24
 		str	ip, [r0]
 		mov	ip, r3, lsr #8
 		subs	r2, r2, #1
-		bne	4b
+		bne	5b
 		mov	pc, lr
 
 		/* Nobody could say these are optimal, but not to worry. */
@@ -102,8 +109,7 @@ ENTRY(outsl)
 ENTRY(outswb)
 		mov	r2, r2, lsr #1
 ENTRY(outsw)
-		add	r0, r0, #0xff000000
-		add	r0, r0, #0x00e00000
+		ioaddr	r0, r0
 1:		subs	r2, r2, #1
 		ldrgeh	r3, [r1], #2
 		strgeh	r3, [r0]
@@ -114,8 +120,7 @@ ENTRY(inswb)
 		mov	r2, r2, lsr #1
 ENTRY(insw)
 		stmfd	sp!, {r4, r5, lr}
-		add	r0, r0, #0xff000000
-		add	r0, r0, #0x00e00000
+		ioaddr	r0, r0
 						@ + 8 + 9 +10 +11 +12 +13 +14 +15 +16 +17
 		subs	ip, r2, #8
 		blo	too_little
@@ -176,8 +181,7 @@ too_little:	subs	r2, r2, #1
 
 
 ENTRY(insb)
-		add	r0, r0, #0xff000000
-		add	r0, r0, #0x00e00000
+		ioaddr	r0, r0
 1:		teq	r2, #0
 		ldrneb	r3, [r0]
 		strneb	r3, [r1], #1
@@ -187,8 +191,7 @@ ENTRY(insb)
 
 
 ENTRY(outsb)
-		add	r0, r0, #0xff000000
-		add	r0, r0, #0x00e00000
+		ioaddr	r0, r0
 1:		teq	r2, #0
 		ldrneb	r3, [r1], #1
 		strneb	r3, [r0]
diff --git a/arch/arm/lib/io.c b/arch/arm/lib/io.c
index d01877934..c94a2ba07 100644
--- a/arch/arm/lib/io.c
+++ b/arch/arm/lib/io.c
@@ -18,7 +18,7 @@ void _memcpy_fromio(void * to, unsigned long from, unsigned long count)
  * Copy data from "real" memory space to IO memory space.
  * This needs to be optimized.
  */
-void _memcpy_toio(unsigned long to, void * from, unsigned long count)
+void _memcpy_toio(unsigned long to, const void * from, unsigned long count)
 {
 	while (count) {
 		count--;
diff --git a/arch/arm/lib/semaphore.S b/arch/arm/lib/semaphore.S
new file mode 100644
index 000000000..778fafc1c
--- /dev/null
+++ b/arch/arm/lib/semaphore.S
@@ -0,0 +1,34 @@
+/*
+ *  linux/arch/arm/lib/semaphore.S
+ *
+ *  Idea from i386 code, Copyright Linus Torvalds.
+ *  Converted for ARM by Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * The semaphore operations have a special calling sequence
+ * that allows us to keep the distruption of the main code
+ * path to a minimum.  These routines save and restore the
+ * registers that will be touched by __down etc.
+ */
+ENTRY(__down_failed)
+	stmfd	sp!, {r0 - r3, ip, lr}
+	bl	SYMBOL_NAME(__down)
+	LOADREGS(fd, sp!, {r0 - r3, ip, pc})
+
+ENTRY(__down_interruptible_failed)
+	stmfd	sp!, {r1 - r3, ip, lr}
+	bl	SYMBOL_NAME(__down_interruptible)
+	LOADREGS(fd, sp!, {r1 - r3, ip, pc})
+
+ENTRY(__down_trylock_failed)
+	stmfd	sp!, {r1 - r3, ip, lr}
+	bl	SYMBOL_NAME(__down_trylock)
+	LOADREGS(fd, sp!, {r1 - r3, ip, pc})
+
+ENTRY(__up_wakeup)
+	stmfd	sp!, {r0 - r3, ip, lr}
+	bl	SYMBOL_NAME(__up)
+	LOADREGS(fd, sp!, {r0 - r3, ip, pc})
diff --git a/arch/arm/mm/fault-common.c b/arch/arm/mm/fault-common.c
index 810dea699..1251525da 100644
--- a/arch/arm/mm/fault-common.c
+++ b/arch/arm/mm/fault-common.c
@@ -26,25 +26,14 @@ void __bad_pmd_kernel(pmd_t *pmd)
 	set_pmd(pmd, mk_kernel_pmd(BAD_PAGETABLE));
 }
 
-static void
-kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs,
-		  struct task_struct *tsk, struct mm_struct *mm)
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+void show_pte(struct mm_struct *mm, unsigned long addr)
 {
-	char *reason;
-	/*
-	 * Oops. The kernel tried to access some bad page. We'll have to
-	 * terminate things with extreme prejudice.
-	 */
 	pgd_t *pgd;
 
-	if (addr < PAGE_SIZE)
-		reason = "NULL pointer dereference";
-	else
-		reason = "paging request";
-
-	printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n",
-		reason, addr);
-	printk(KERN_ALERT "memmap = %08lX, pgd = %p\n", tsk->tss.memmap, mm->pgd);
 	pgd = pgd_offset(mm, addr);
 	printk(KERN_ALERT "*pgd = %08lx", pgd_val(*pgd));
 
@@ -77,6 +66,27 @@ kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs,
 	} while(0);
 
 	printk("\n");
+}
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+static void
+kernel_page_fault(unsigned long addr, int mode, struct pt_regs *regs,
+		  struct task_struct *tsk, struct mm_struct *mm)
+{
+	char *reason;
+
+	if (addr < PAGE_SIZE)
+		reason = "NULL pointer dereference";
+	else
+		reason = "paging request";
+
+	printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n",
+		reason, addr);
+	printk(KERN_ALERT "memmap = %08lX, pgd = %p\n", tsk->tss.memmap, mm->pgd);
+	show_pte(mm, addr);
 	die("Oops", regs, mode);
 
 	do_exit(SIGKILL);
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 70d7c77b9..48e34214e 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -115,19 +115,19 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
 {
 	void * addr;
 	struct vm_struct * area;
-	unsigned long offset;
+	unsigned long offset, last_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
 
 	/*
 	 * Mappings have to be page-aligned
 	 */
 	offset = phys_addr & ~PAGE_MASK;
-	size = PAGE_ALIGN(size + offset);
-
-	/*
-	 * Don't allow mappings that wrap..
-	 */
-	if (!size || size > phys_addr + size)
-		return NULL;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr) - phys_addr;
 
 	/*
 	 * Ok, go for it..
diff --git a/arch/arm/mm/proc-arm2,3.S b/arch/arm/mm/proc-arm2,3.S
index 263d79708..7e4871fe2 100644
--- a/arch/arm/mm/proc-arm2,3.S
+++ b/arch/arm/mm/proc-arm2,3.S
@@ -202,15 +202,11 @@ _arm2_3_check_bugs:
 LC0:		.word	SYMBOL_NAME(page_nr)
 /*
  * Function: arm2_switch_to (struct task_struct *prev, struct task_struct *next)
- *
  * Params  : prev	Old task structure
  *	   : next	New task structure for process to run
- *
  * Returns : prev
- *
  * Purpose : Perform a task switch, saving the old processes state, and restoring
  *	     the new.
- *
  * Notes   : We don't fiddle with the FP registers here - we postpone this until
  *	     the new task actually uses FP.  This way, we don't swap FP for tasks
  *	     that do not require it.
@@ -316,15 +312,11 @@ _arm2_proc_init:
 _arm2_proc_fin:	movs	pc, lr
 /*
  * Function: arm3_switch_to (struct task_struct *prev, struct task_struct *next)
- *
  * Params  : prev	Old task structure
  *	   : next	New task structure for process to run
- *
  * Returns : prev
- *
  * Purpose : Perform a task switch, saving the old processes state, and restoring
  *	     the new.
- *
  * Notes   : We don't fiddle with the FP registers here - we postpone this until
  *	     the new task actually uses FP.  This way, we don't swap FP for tasks
  *	     that do not require it.
diff --git a/arch/arm/mm/proc-arm6,7.S b/arch/arm/mm/proc-arm6,7.S
index b817ae2b4..d1f31e35d 100644
--- a/arch/arm/mm/proc-arm6,7.S
+++ b/arch/arm/mm/proc-arm6,7.S
@@ -74,14 +74,14 @@ _arm6_7_switch_to:
 		str	sp, [r0, #TSS_SAVE]		@ Save sp_SVC
 		ldr	sp, [r1, #TSS_SAVE]		@ Get saved sp_SVC
 		ldr	r2, [r1, #TSK_ADDR_LIMIT]
+		ldr	r3, [r1, #TSS_MEMMAP]		@ Page table pointer
 		teq	r2, #0
 		moveq	r2, #DOM_KERNELDOMAIN
 		movne	r2, #DOM_USERDOMAIN
 		mcr	p15, 0, r2, c3, c0		@ Set domain reg
-		ldr	r2, [r1, #TSS_MEMMAP]		@ Page table pointer
 		mov	r1, #0
 		mcr	p15, 0, r1, c7, c0, 0		@ flush cache
-		mcr	p15, 0, r2, c2, c0, 0		@ update page table ptr
+		mcr	p15, 0, r3, c2, c0, 0		@ update page table ptr
 		mcr	p15, 0, r1, c5, c0, 0		@ flush TLBs
 		ldmfd	sp!, {ip}
 		msr	spsr, ip			@ Save tasks CPSR into SPSR for this return
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index ff55c8ffa..be9fad45e 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -29,11 +29,11 @@ _sa110_flush_cache_all:					@ preserves r0
 		mov	r2, #1
 _sa110_flush_cache_all_r2:
 		ldr	r3, =Lclean_switch
+		ldr	ip, =FLUSH_BASE
 		ldr	r1, [r3]
 		ands	r1, r1, #1
 		eor	r1, r1, #1
 		str	r1, [r3]
-		ldr	ip, =FLUSH_BASE
 		addne	ip, ip, #32768
 		add	r1, ip, #16384			@ only necessary for 16k
 1:		ldr	r3, [ip], #32
@@ -226,12 +226,12 @@ _sa110_switch_to:
 		ldr	r2, [r0, #TSS_MEMMAP]		@ Get old page tables
 		str	sp, [r0, #TSS_SAVE]		@ Save sp_SVC
 		ldr	sp, [r1, #TSS_SAVE]		@ Get saved sp_SVC
-		ldr	r4, [r1, #TSK_ADDR_LIMIT]
-		teq	r4, #0
-		moveq	r4, #DOM_KERNELDOMAIN
-		movne	r4, #DOM_USERDOMAIN
-		mcr	p15, 0, r4, c3, c0		@ Set segment
+		ldr	r5, [r1, #TSK_ADDR_LIMIT]
 		ldr	r4, [r1, #TSS_MEMMAP]		@ Page table pointer
+		teq	r5, #0
+		moveq	r5, #DOM_KERNELDOMAIN
+		movne	r5, #DOM_USERDOMAIN
+		mcr	p15, 0, r5, c3, c0		@ Set segment
 /*
  * Flushing the cache is nightmarishly slow, so we take any excuse
  * to get out of it.  If the old page table is the same as the new,
@@ -288,7 +288,8 @@ _sa110_data_abort:
  */
 		.align	5
 _sa110_set_pmd:	str	r1, [r0]
-		mcr	p15, 0, r0, c7, c10, 1		@ clean D entry	 (drain is done by TLB fns)
+		mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+		mcr	p15, 0, r0, c7, c10, 4		@ drain WB (TLB bypasses WB)
 		mov	pc, lr
 
 /*
@@ -318,6 +319,7 @@ _sa110_set_pte:	str	r1, [r0], #-1024		@ linux version
 		str	r2, [r0]			@ hardware version
 		mov	r0, r0
 		mcr	p15, 0, r0, c7, c10, 1		@ clean D entry	 (drain is done by TLB fns)
+		mcr	p15, 0, r0, c7, c10, 4		@ drain WB (TLB bypasses WB)
 		mov	pc, lr
 
 /*
diff --git a/arch/arm/nwfpe/ARM-gcc.h b/arch/arm/nwfpe/ARM-gcc.h
new file mode 100644
index 000000000..d726aa452
--- /dev/null
+++ b/arch/arm/nwfpe/ARM-gcc.h
@@ -0,0 +1,128 @@
+
+/*
+-------------------------------------------------------------------------------
+One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
+-------------------------------------------------------------------------------
+*/
+#define LITTLEENDIAN
+
+/*
+-------------------------------------------------------------------------------
+The macro `BITS64' can be defined to indicate that 64-bit integer types are
+supported by the compiler.
+-------------------------------------------------------------------------------
+*/
+#define BITS64
+
+/*
+-------------------------------------------------------------------------------
+Each of the following `typedef's defines the most convenient type that holds
+integers of at least as many bits as specified.  For example, `uint8' should
+be the most convenient type that can hold unsigned integers of as many as
+8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+to the same as `int'.
+-------------------------------------------------------------------------------
+*/
+typedef char flag;
+typedef unsigned char uint8;
+typedef signed char int8;
+typedef int uint16;
+typedef int int16;
+typedef unsigned int uint32;
+typedef signed int int32;
+#ifdef BITS64
+typedef unsigned long long int bits64;
+typedef signed long long int sbits64;
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Each of the following `typedef's defines a type that holds integers
+of _exactly_ the number of bits specified.  For instance, for most
+implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+`unsigned short int' and `signed short int' (or `short int'), respectively.
+-------------------------------------------------------------------------------
+*/
+typedef unsigned char bits8;
+typedef signed char sbits8;
+typedef unsigned short int bits16;
+typedef signed short int sbits16;
+typedef unsigned int bits32;
+typedef signed int sbits32;
+#ifdef BITS64
+typedef unsigned long long int uint64;
+typedef signed long long int int64;
+#endif
+
+#ifdef BITS64
+/*
+-------------------------------------------------------------------------------
+The `LIT64' macro takes as its argument a textual integer literal and if
+necessary ``marks'' the literal as having a 64-bit integer type.  For
+example, the Gnu C Compiler (`gcc') requires that 64-bit literals be
+appended with the letters `LL' standing for `long long', which is `gcc's
+name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+defined as the identity macro:  `#define LIT64( a ) a'.
+-------------------------------------------------------------------------------
+*/
+#define LIT64( a ) a##LL
+#endif
+
+/*
+-------------------------------------------------------------------------------
+The macro `INLINE' can be used before functions that should be inlined.  If
+a compiler does not support explicit inlining, this macro should be defined
+to be `static'.
+-------------------------------------------------------------------------------
+*/
+#define INLINE extern __inline__
+
+
+/* For use as a GCC soft-float library we need some special function names. */
+
+#ifdef __LIBFLOAT__
+
+/* Some 32-bit ops can be mapped straight across by just changing the name. */
+#define float32_add			__addsf3
+#define float32_sub			__subsf3
+#define float32_mul			__mulsf3
+#define float32_div			__divsf3
+#define int32_to_float32		__floatsisf
+#define float32_to_int32_round_to_zero	__fixsfsi
+#define float32_to_uint32_round_to_zero	__fixunssfsi
+
+/* These ones go through the glue code.  To avoid namespace pollution
+   we rename the internal functions too.  */
+#define float32_eq			___float32_eq
+#define float32_le			___float32_le
+#define float32_lt			___float32_lt
+
+/* All the 64-bit ops have to go through the glue, so we pull the same
+   trick.  */
+#define float64_add			___float64_add
+#define float64_sub			___float64_sub
+#define float64_mul			___float64_mul
+#define float64_div			___float64_div
+#define int32_to_float64		___int32_to_float64
+#define float64_to_int32_round_to_zero	___float64_to_int32_round_to_zero
+#define float64_to_uint32_round_to_zero	___float64_to_uint32_round_to_zero
+#define float64_to_float32		___float64_to_float32
+#define float32_to_float64		___float32_to_float64
+#define float64_eq			___float64_eq
+#define float64_le			___float64_le
+#define float64_lt			___float64_lt
+
+#if 0
+#define float64_add			__adddf3
+#define float64_sub			__subdf3
+#define float64_mul			__muldf3
+#define float64_div			__divdf3
+#define int32_to_float64		__floatsidf
+#define float64_to_int32_round_to_zero	__fixdfsi
+#define float64_to_uint32_round_to_zero	__fixunsdfsi
+#define float64_to_float32		__truncdfsf2
+#define float32_to_float64		__extendsfdf2
+#endif
+
+#endif
diff --git a/arch/arm/nwfpe/ChangeLog b/arch/arm/nwfpe/ChangeLog
new file mode 100644
index 000000000..e160d36c3
--- /dev/null
+++ b/arch/arm/nwfpe/ChangeLog
@@ -0,0 +1,20 @@
+1998-11-23  Scott Bambrough  <scottb@corelcomputer.com>
+
+	* README.FPE - fix typo in description of lfm/sfm instructions
+	* NOTES - Added file to describe known bugs/problems 
+	* fpmodule.c - Changed version number to 0.94
+
+1998-11-20  Scott Bambrough  <scottb@corelcomputer.com>
+
+	* README.FPE - fix description of URD, NRM instructions
+	* TODO - remove URD, NRM instructions from TODO list
+	* single_cpdo.c - implement URD, NRM
+	* double_cpdo.c - implement URD, NRM
+	* extended_cpdo.c - implement URD, NRM
+
+1998-11-19  Scott Bambrough  <scottb@corelcomputer.com>
+
+	* ChangeLog - Added this file to track changes made.
+	* fpa11.c - added code to initialize register types to typeNone
+	* fpa11_cpdt.c - fixed bug in storeExtended (typeExtended changed to
+	  typeDouble in switch statement)
diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile
new file mode 100644
index 000000000..5db79c6d4
--- /dev/null
+++ b/arch/arm/nwfpe/Makefile
@@ -0,0 +1,31 @@
+#
+# linux/arch/arm/nwfpe/Makefile
+#
+# Copyright (C) 1998, 1999 Philip Blundell
+#
+
+NWFPE_OBJS := fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \
+	      fpmodule.o fpopcode.o softfloat.o \
+	      single_cpdo.o double_cpdo.o extended_cpdo.o
+
+ifeq ($(CONFIG_CPU_26),y)
+NWFPE_OBJS += entry26.o
+else
+NWFPE_OBJS += entry.o
+endif
+
+L_TARGET := math-emu.a
+
+ifeq ($(CONFIG_NWFPE),y)
+L_OBJS = $(NWFPE_OBJS)
+else
+  ifeq ($(CONFIG_NWFPE),m)
+    M_OBJS = nwfpe.o
+    MI_OBJS = $(NWFPE_OBJS)
+  endif
+endif    
+
+include $(TOPDIR)/Rules.make
+
+nwfpe.o: $(MI_OBJS) $(MIX_OBJS)
+	 $(LD) $(LD_RFLAG) -r -o $@ $(MI_OBJS) $(MIX_OBJS)
diff --git a/arch/arm/nwfpe/config.h b/arch/arm/nwfpe/config.h
new file mode 100644
index 000000000..35f9d6336
--- /dev/null
+++ b/arch/arm/nwfpe/config.h
@@ -0,0 +1,31 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __CONFIG_H__
+#define __CONFIG_H__
+
+#if 1
+#define C_SYMBOL_NAME(foo)	foo
+#else
+#define C_SYMBOL_NAME(foo)	_##foo
+#endif
+
+#endif
diff --git a/arch/arm/nwfpe/double_cpdo.c b/arch/arm/nwfpe/double_cpdo.c
new file mode 100644
index 000000000..e746c7a29
--- /dev/null
+++ b/arch/arm/nwfpe/double_cpdo.c
@@ -0,0 +1,293 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.h"
+
+extern FPA11 *fpa11;
+
+float64 getDoubleConstant(unsigned int);
+
+float64 float64_exp(float64 Fm);
+float64 float64_ln(float64 Fm);
+float64 float64_sin(float64 rFm);
+float64 float64_cos(float64 rFm);
+float64 float64_arcsin(float64 rFm);
+float64 float64_arctan(float64 rFm);
+float64 float64_log(float64 rFm);
+float64 float64_tan(float64 rFm);
+float64 float64_arccos(float64 rFm);
+float64 float64_pow(float64 rFn,float64 rFm);
+float64 float64_pol(float64 rFn,float64 rFm);
+
+unsigned int DoubleCPDO(const unsigned int opcode)
+{
+   float64 rFm, rFn;
+   unsigned int Fd, Fm, Fn, nRc = 1;
+
+   //fp_printk("DoubleCPDO(0x%08x)\n",opcode);
+   
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getDoubleConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fpreg[Fm].fType)
+     {
+        case typeSingle:
+          rFm = float32_to_float64(fpa11->fpreg[Fm].fValue.fSingle);
+        break;
+
+        case typeDouble:
+          rFm = fpa11->fpreg[Fm].fValue.fDouble;
+          break;
+
+        case typeExtended:
+            // !! patb
+	    //fp_printk("not implemented! why not?\n");
+            //!! ScottB
+            // should never get here, if extended involved
+            // then other operand should be promoted then
+            // ExtendedCPDO called.
+            break;
+
+        default: return 0;
+     }
+   }
+
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fpreg[Fn].fType)
+      {
+        case typeSingle:
+          rFn = float32_to_float64(fpa11->fpreg[Fn].fValue.fSingle);
+        break;
+
+        case typeDouble:
+          rFn = fpa11->fpreg[Fn].fValue.fDouble;
+        break;
+        
+        default: return 0;
+      }
+   }
+
+   Fd = getFd(opcode);
+   /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_add(rFn,rFm);
+      break;
+
+      case MUF_CODE:
+      case FML_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_mul(rFn,rFm);
+      break;
+
+   case SUF_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_sub(rFn,rFm);
+      break;
+
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_sub(rFm,rFn);
+      break;
+
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_div(rFn,rFm);
+      break;
+
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_div(rFm,rFn);
+      break;
+
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_pow(rFn,rFm);
+      break;
+
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_pow(rFm,rFn);
+      break;
+#endif
+
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_rem(rFn,rFm);
+      break;
+
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_pol(rFn,rFm);
+      break;
+#endif
+
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = rFm;
+      break;
+
+      case MNF_CODE:
+      {
+         unsigned int *p = (unsigned int*)&rFm;
+         p[1] ^= 0x80000000;
+         fpa11->fpreg[Fd].fValue.fDouble = rFm;
+      }
+      break;
+
+      case ABS_CODE:
+      {
+         unsigned int *p = (unsigned int*)&rFm;
+         p[1] &= 0x7fffffff;
+         fpa11->fpreg[Fd].fValue.fDouble = rFm;
+      }
+      break;
+
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = 
+             int32_to_float64(float64_to_int32(rFm));
+      break;
+
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_sqrt(rFm);
+      break;
+
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_log(rFm);
+      break;
+
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_ln(rFm);
+      break;
+
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_exp(rFm);
+      break;
+
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_sin(rFm);
+      break;
+
+      case COS_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_cos(rFm);
+      break;
+
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_tan(rFm);
+      break;
+
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_arcsin(rFm);
+      break;
+
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_arccos(rFm);
+      break;
+
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fValue.fDouble = float64_arctan(rFm);
+      break;
+#endif
+
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+
+   if (0 != nRc) fpa11->fpreg[Fd].fType = typeDouble;
+   return nRc;
+}
+
+#if 0
+float64 float64_exp(float64 rFm)
+{
+  return rFm;
+//series
+}
+
+float64 float64_ln(float64 rFm)
+{
+  return rFm;
+//series
+}
+
+float64 float64_sin(float64 rFm)
+{
+  return rFm;
+//series
+}
+
+float64 float64_cos(float64 rFm)
+{
+   return rFm;
+   //series
+}
+
+#if 0
+float64 float64_arcsin(float64 rFm)
+{
+//series
+}
+
+float64 float64_arctan(float64 rFm)
+{
+  //series
+}
+#endif
+
+float64 float64_log(float64 rFm)
+{
+  return float64_div(float64_ln(rFm),getDoubleConstant(7));
+}
+
+float64 float64_tan(float64 rFm)
+{
+  return float64_div(float64_sin(rFm),float64_cos(rFm));
+}
+
+float64 float64_arccos(float64 rFm)
+{
+return rFm;
+   //return float64_sub(halfPi,float64_arcsin(rFm));
+}
+
+float64 float64_pow(float64 rFn,float64 rFm)
+{
+  return float64_exp(float64_mul(rFm,float64_ln(rFn))); 
+}
+
+float64 float64_pol(float64 rFn,float64 rFm)
+{
+  return float64_arctan(float64_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
new file mode 100644
index 000000000..6f0077fbe
--- /dev/null
+++ b/arch/arm/nwfpe/entry.S
@@ -0,0 +1,126 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+    (c) Philip Blundell 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/* This is the kernel's entry point into the floating point emulator.
+It is called from the kernel with code similar to this:
+
+	adrsvc	al, r9, ret_from_exception	@ r9  = normal FP return
+	adrsvc	al, lr, fpundefinstr		@ lr  = undefined instr return
+
+	get_current_task r10
+	mov	r8, #1
+	strb	r8, [r10, #TSK_USED_MATH]	@ set current->used_math
+	add	r10, r10, #TSS_FPESAVE		@ r10 = workspace
+	ldr	r4, .LC2
+	ldr	pc, [r4]			@ Call FP emulator entry point
+
+The kernel expects the emulator to return via one of two possible
+points of return it passes to the emulator.  The emulator, if
+successful in its emulation, jumps to ret_from_exception (passed in
+r9) and the kernel takes care of returning control from the trap to
+the user code.  If the emulator is unable to emulate the instruction,
+it returns via _fpundefinstr (passed via lr) and the kernel halts the
+user program with a core dump.
+
+On entry to the emulator r10 points to an area of private FP workspace
+reserved in the thread structure for this process.  This is where the
+emulator saves its registers across calls.  The first word of this area
+is used as a flag to detect the first time a process uses floating point,
+so that the emulator startup cost can be avoided for tasks that don't
+want it.
+
+This routine does three things:
+
+1) It saves SP into a variable called userRegisters.  The kernel has
+created a struct pt_regs on the stack and saved the user registers
+into it.  See /usr/include/asm/proc/ptrace.h for details.  The
+emulator code uses userRegisters as the base of an array of words from
+which the contents of the registers can be extracted.
+
+2) It calls EmulateAll to emulate a floating point instruction.
+EmulateAll returns 1 if the emulation was successful, or 0 if not.
+
+3) If an instruction has been emulated successfully, it looks ahead at
+the next instruction.  If it is a floating point instruction, it
+executes the instruction, without returning to user space.  In this
+way it repeatedly looks ahead and executes floating point instructions
+until it encounters a non floating point instruction, at which time it
+returns via _fpreturn.
+
+This is done to reduce the effect of the trap overhead on each
+floating point instructions.  GCC attempts to group floating point
+instructions to allow the emulator to spread the cost of the trap over
+several floating point instructions.  */
+
+	.globl	nwfpe_enter
+nwfpe_enter:
+	/* ?? Could put userRegisters and fpa11 into fixed regs during
+	   emulation.  This would reduce load/store overhead at the expense
+	   of stealing two regs from the register allocator.  Not sure if
+	   it's worth it.  */
+	ldr r4, =userRegisters
+        str sp, [r4]			@ save pointer to user regs
+	ldr r4, =fpa11
+	str r10, [r4]			@ store pointer to our state
+        mov r4, sp			@ use r4 for local pointer
+        mov r10, lr			@ save the failure-return addresses
+
+        ldr r5, [r4, #60]	 	@ get contents of PC;
+	ldr r0, [r5, #-4]		@ get actual instruction into r0
+emulate:
+	bl EmulateAll			@ emulate the instruction
+   	cmp r0, #0			@ was emulation successful
+        moveq pc, r10			@ no, return failure
+
+next:
+__x1:	ldrt	r6, [r5], #4		@ get the next instruction and
+					@ increment PC
+
+	and   r2, r6, #0x0F000000	@ test for FP insns
+        teq   r2, #0x0C000000
+        teqne r2, #0x0D000000
+        teqne r2, #0x0E000000
+        movne pc, r9			@ return ok if not a fp insn
+
+        str r5, [r4, #60]		@ update PC copy in regs
+
+        mov r0, r6			@ save a copy
+        ldr r1, [r4, #64]		@ fetch the condition codes
+   	bl  checkCondition		@ check the condition
+   	cmp r0, #0			@ r0 = 0 ==> condition failed
+
+        @ if condition code failed to match, next insn
+   	beq next			@ get the next instruction;
+   	    
+        mov r0, r6			@ prepare for EmulateAll()
+   	b emulate			@ if r0 != 0, goto EmulateAll
+
+	@ We need to be prepared for the instruction at __x1 to fault.
+	@ Emit the appropriate exception gunk to fix things up.
+	.section .fixup,"ax"
+	.align
+__f1:	mov	pc, r9
+	.previous
+	.section __ex_table,"a"
+	.align 3
+	.long	__x1, __f1
+	.previous
diff --git a/arch/arm/nwfpe/entry26.S b/arch/arm/nwfpe/entry26.S
new file mode 100644
index 000000000..6b1ec3354
--- /dev/null
+++ b/arch/arm/nwfpe/entry26.S
@@ -0,0 +1,112 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+    (c) Philip Blundell 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "../lib/constants.h"
+
+/* This is the kernel's entry point into the floating point emulator.
+It is called from the kernel with code similar to this:
+
+	mov	fp, #0
+	teqp	pc, #I_BIT | MODE_SVC
+	ldr	r4, .LC2
+	ldr	pc, [r4]		@ Call FP module USR entry point
+
+The kernel expects the emulator to return via one of two possible
+points of return it passes to the emulator.  The emulator, if
+successful in its emulation, jumps to ret_from_exception and the
+kernel takes care of returning control from the trap to the user code.
+If the emulator is unable to emulate the instruction, it returns to
+fpundefinstr and the kernel halts the user program with a core dump.
+
+This routine does four things:
+
+1) It saves SP into a variable called userRegisters.  The kernel has
+created a struct pt_regs on the stack and saved the user registers
+into it.  See /usr/include/asm/proc/ptrace.h for details.  The
+emulator code uses userRegisters as the base of an array of words from
+which the contents of the registers can be extracted.
+
+2) It locates the FP emulator work area within the TSS structure and
+points `fpa11' to it.
+
+3) It calls EmulateAll to emulate a floating point instruction.
+EmulateAll returns 1 if the emulation was successful, or 0 if not.
+
+4) If an instruction has been emulated successfully, it looks ahead at
+the next instruction.  If it is a floating point instruction, it
+executes the instruction, without returning to user space.  In this
+way it repeatedly looks ahead and executes floating point instructions
+until it encounters a non floating point instruction, at which time it
+returns via _fpreturn.
+
+This is done to reduce the effect of the trap overhead on each
+floating point instructions.  GCC attempts to group floating point
+instructions to allow the emulator to spread the cost of the trap over
+several floating point instructions.  */
+
+	.globl	nwfpe_enter
+nwfpe_enter:
+	ldr	r4, =userRegisters
+	str	sp, [r4]		@ save pointer to user regs
+
+	mov	r10, sp, lsr #13	@ find workspace
+	mov	r10, r10, lsl #13
+	add	r10, r10, #TSS_FPESAVE
+
+	ldr	r4, =fpa11
+	str	r10, [r4]		@ store pointer to our state
+	mov	r4, sp			@ use r4 for local pointer
+
+	ldr	r5, [r4, #60]		@ get contents of PC
+	bic	r5, r5, #0xfc000003
+	ldr	r0, [r5, #-4]		@ get actual instruction into r0
+	bl	EmulateAll		@ emulate the instruction
+1:	cmp	r0, #0			@ was emulation successful
+	beq	fpundefinstr		@ no, return failure
+
+next:
+	ldrt	r6, [r5], #4		@ get the next instruction and
+					@ increment PC
+
+	and	r2, r6, #0x0F000000	@ test for FP insns
+	teq	r2, #0x0C000000
+	teqne	r2, #0x0D000000
+	teqne	r2, #0x0E000000
+	bne	ret_from_exception	@ return ok if not a fp insn
+
+	ldr	r9, [r4, #60]		@ get new condition codes
+	and	r9, r9, #0xfc000003
+	orr	r7, r5, r9
+	str	r7, [r4, #60]		@ update PC copy in regs
+
+	mov	r0, r6			@ save a copy
+	mov	r1, r9			@ fetch the condition codes
+	bl	checkCondition		@ check the condition
+	cmp	r0, #0			@ r0 = 0 ==> condition failed
+
+	@ if condition code failed to match, next insn
+	beq	next			@ get the next instruction;
+	    
+	mov	r0, r6			@ prepare for EmulateAll()
+	adr	lr, 1b
+	orr	lr, lr, #3
+	b	EmulateAll		@ if r0 != 0, goto EmulateAll
diff --git a/arch/arm/nwfpe/extended_cpdo.c b/arch/arm/nwfpe/extended_cpdo.c
new file mode 100644
index 000000000..1c5c66180
--- /dev/null
+++ b/arch/arm/nwfpe/extended_cpdo.c
@@ -0,0 +1,276 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.h"
+
+floatx80 getExtendedConstant(unsigned int);
+
+floatx80 floatx80_exp(floatx80 Fm);
+floatx80 floatx80_ln(floatx80 Fm);
+floatx80 floatx80_sin(floatx80 rFm);
+floatx80 floatx80_cos(floatx80 rFm);
+floatx80 floatx80_arcsin(floatx80 rFm);
+floatx80 floatx80_arctan(floatx80 rFm);
+floatx80 floatx80_log(floatx80 rFm);
+floatx80 floatx80_tan(floatx80 rFm);
+floatx80 floatx80_arccos(floatx80 rFm);
+floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm);
+floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm);
+
+unsigned int ExtendedCPDO(const unsigned int opcode)
+{
+   floatx80 rFm, rFn;
+   unsigned int Fd, Fm, Fn, nRc = 1;
+
+   //fp_printk("ExtendedCPDO(0x%08x)\n",opcode);
+   
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getExtendedConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fpreg[Fm].fType)
+     {
+        case typeSingle:
+          rFm = float32_to_floatx80(fpa11->fpreg[Fm].fValue.fSingle);
+        break;
+
+        case typeDouble:
+          rFm = float64_to_floatx80(fpa11->fpreg[Fm].fValue.fDouble);
+        break;
+        
+        case typeExtended:
+          rFm = fpa11->fpreg[Fm].fValue.fExtended;
+        break;
+        
+        default: return 0;
+     }
+   }
+   
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fpreg[Fn].fType)
+      {
+        case typeSingle:
+          rFn = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle);
+        break;
+
+        case typeDouble:
+          rFn = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble);
+        break;
+        
+        case typeExtended:
+          rFn = fpa11->fpreg[Fn].fValue.fExtended;
+        break;
+        
+        default: return 0;
+      }
+   }
+
+   Fd = getFd(opcode);
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_add(rFn,rFm);
+      break;
+
+      case MUF_CODE:
+      case FML_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_mul(rFn,rFm);
+      break;
+
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_sub(rFn,rFm);
+      break;
+
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_sub(rFm,rFn);
+      break;
+
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_div(rFn,rFm);
+      break;
+
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_div(rFm,rFn);
+      break;
+
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_pow(rFn,rFm);
+      break;
+
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_pow(rFm,rFn);
+      break;
+#endif
+
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_rem(rFn,rFm);
+      break;
+
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_pol(rFn,rFm);
+      break;
+#endif
+
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = rFm;
+      break;
+
+      case MNF_CODE:
+         rFm.high ^= 0x8000;
+         fpa11->fpreg[Fd].fValue.fExtended = rFm;
+      break;
+
+      case ABS_CODE:
+         rFm.high &= 0x7fff;
+         fpa11->fpreg[Fd].fValue.fExtended = rFm;
+      break;
+
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = 
+             int32_to_floatx80(floatx80_to_int32(rFm));
+      break;
+
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_sqrt(rFm);
+      break;
+
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_log(rFm);
+      break;
+
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_ln(rFm);
+      break;
+
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_exp(rFm);
+      break;
+
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_sin(rFm);
+      break;
+
+      case COS_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_cos(rFm);
+      break;
+
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_tan(rFm);
+      break;
+
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_arcsin(rFm);
+      break;
+
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_arccos(rFm);
+      break;
+
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fValue.fExtended = floatx80_arctan(rFm);
+      break;
+#endif
+
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+   
+   if (0 != nRc) fpa11->fpreg[Fd].fType = typeExtended;
+   return nRc;
+}
+
+#if 0
+floatx80 floatx80_exp(floatx80 Fm)
+{
+//series
+}
+
+floatx80 floatx80_ln(floatx80 Fm)
+{
+//series
+}
+
+floatx80 floatx80_sin(floatx80 rFm)
+{
+//series
+}
+
+floatx80 floatx80_cos(floatx80 rFm)
+{
+//series
+}
+
+floatx80 floatx80_arcsin(floatx80 rFm)
+{
+//series
+}
+
+floatx80 floatx80_arctan(floatx80 rFm)
+{
+  //series
+}
+
+floatx80 floatx80_log(floatx80 rFm)
+{
+  return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7));
+}
+
+floatx80 floatx80_tan(floatx80 rFm)
+{
+  return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm));
+}
+
+floatx80 floatx80_arccos(floatx80 rFm)
+{
+   //return floatx80_sub(halfPi,floatx80_arcsin(rFm));
+}
+
+floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm)
+{
+  return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn))); 
+}
+
+floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm)
+{
+  return floatx80_arctan(floatx80_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c
new file mode 100644
index 000000000..506821ca8
--- /dev/null
+++ b/arch/arm/nwfpe/fpa11.c
@@ -0,0 +1,206 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "fpa11.h"
+#include "milieu.h"
+#include "fpopcode.h"
+
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+/* forward declarations */
+unsigned int EmulateCPDO(const unsigned int);
+unsigned int EmulateCPDT(const unsigned int);
+unsigned int EmulateCPRT(const unsigned int);
+
+/* Emulator registers */
+FPA11 *fpa11;
+
+/* Reset the FPA11 chip.  Called to initialize and reset the emulator. */
+void resetFPA11(void)
+{
+  int i;
+  /* initialize the registers */
+  for (i=0;i<=7;i++)
+  {
+    fpa11->fpreg[i].fType = typeNone;
+  }
+  
+  /* FPSR: set system id to FP_EMULATOR, clear all other bits */
+  fpa11->fpsr = FP_EMULATOR;
+  
+  /* FPCR: set SB, AB and DA bits, clear all others */
+#if MAINTAIN_FPCR         
+  fpa11->fpcr = MASK_RESET;
+#endif
+}
+
+void SetRoundingMode(const unsigned int opcode)
+{
+#if MAINTAIN_FPCR
+   fpa11->fpcr &= ~MASK_ROUNDING_MODE;
+#endif   
+   switch (opcode & MASK_ROUNDING_MODE)
+   {
+      default:
+      case ROUND_TO_NEAREST:
+         float_rounding_mode = float_round_nearest_even;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_NEAREST;
+#endif         
+      break;
+      
+      case ROUND_TO_PLUS_INFINITY:
+         float_rounding_mode = float_round_up;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_PLUS_INFINITY;
+#endif         
+      break;
+      
+      case ROUND_TO_MINUS_INFINITY:
+         float_rounding_mode = float_round_down;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_MINUS_INFINITY;
+#endif         
+      break;
+      
+      case ROUND_TO_ZERO:
+         float_rounding_mode = float_round_to_zero;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_ZERO;
+#endif         
+      break;
+  }
+}
+
+void SetRoundingPrecision(const unsigned int opcode)
+{
+#if MAINTAIN_FPCR
+   fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
+#endif   
+   switch (opcode & MASK_ROUNDING_PRECISION)
+   {
+      case ROUND_SINGLE:
+         floatx80_rounding_precision = 32;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_SINGLE;
+#endif         
+      break;
+      
+      case ROUND_DOUBLE:
+         floatx80_rounding_precision = 64;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_DOUBLE;
+#endif         
+      break;
+      
+      case ROUND_EXTENDED:
+         floatx80_rounding_precision = 80;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_EXTENDED;
+#endif         
+      break;
+      
+      default: floatx80_rounding_precision = 80;
+  }
+}
+
+/* Emulate the instruction in the opcode. */
+unsigned int EmulateAll(unsigned int opcode)
+{
+  unsigned int nRc = 0;
+
+  if (fpa11->initflag == 0)		/* good place for __builtin_expect */
+  {
+    resetFPA11();
+    SetRoundingMode(ROUND_TO_NEAREST);
+    SetRoundingPrecision(ROUND_EXTENDED);
+    fpa11->initflag = 1;
+  }
+
+  if (TEST_OPCODE(opcode,MASK_CPRT))
+  {
+    /* Emulate conversion opcodes. */
+    /* Emulate register transfer opcodes. */
+    /* Emulate comparison opcodes. */
+    nRc = EmulateCPRT(opcode);
+  }
+  else if (TEST_OPCODE(opcode,MASK_CPDO))
+  {
+    /* Emulate monadic arithmetic opcodes. */
+    /* Emulate dyadic arithmetic opcodes. */
+    nRc = EmulateCPDO(opcode);
+  }
+  else if (TEST_OPCODE(opcode,MASK_CPDT))
+  {
+    /* Emulate load/store opcodes. */
+    /* Emulate load/store multiple opcodes. */
+    nRc = EmulateCPDT(opcode);
+  }
+  else
+  {
+    /* Invalid instruction detected.  Return FALSE. */
+    nRc = 0;
+  }
+
+  return(nRc);
+}
+
+#if 0
+unsigned int EmulateAll1(unsigned int opcode)
+{
+  switch ((opcode >> 24) & 0xf)
+  {
+     case 0xc:
+     case 0xd:
+       if ((opcode >> 20) & 0x1)
+       {
+          switch ((opcode >> 8) & 0xf)
+          {
+             case 0x1: return PerformLDF(opcode); break;
+             case 0x2: return PerformLFM(opcode); break;
+             default: return 0;
+          }
+       }
+       else
+       {
+          switch ((opcode >> 8) & 0xf)
+          {
+             case 0x1: return PerformSTF(opcode); break;
+             case 0x2: return PerformSFM(opcode); break;
+             default: return 0;
+          }
+      }
+     break;
+     
+     case 0xe: 
+       if (opcode & 0x10)
+         return EmulateCPDO(opcode);
+       else
+         return EmulateCPRT(opcode);
+     break;
+  
+     default: return 0;
+  }
+}
+#endif
+
diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h
new file mode 100644
index 000000000..4a47a29f4
--- /dev/null
+++ b/arch/arm/nwfpe/fpa11.h
@@ -0,0 +1,61 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+    
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPA11_H__
+#define __FPA11_H__
+
+/* includes */
+#include "fpsr.h"		/* FP control and status register definitions */
+#include "softfloat.h"
+
+#define		typeNone		0x00
+#define		typeSingle		0x01
+#define		typeDouble		0x02
+#define		typeExtended		0x03
+
+typedef struct tagFPREG {
+   unsigned int fType;
+   union {
+      float32  fSingle;
+      float64  fDouble;
+      floatx80 fExtended;
+   } fValue;
+} FPREG;
+
+/* FPA11 device model */
+typedef struct tagFPA11 {
+  int initflag;			/* this is special.  The kernel guarantees
+				   to set it to 0 when a thread is launched,
+				   so we can use it to detect whether this
+				   instance of the emulator needs to be
+				   initialised. */
+  FPREG fpreg[8];		/* 8 floating point registers */
+  FPSR fpsr;			/* floating point status register */
+  FPCR fpcr;			/* floating point control register */
+} FPA11;
+
+extern void resetFPA11(void);
+extern void SetRoundingMode(const unsigned int);
+extern void SetRoundingPrecision(const unsigned int);
+
+extern FPA11 *fpa11;
+
+#endif
diff --git a/arch/arm/nwfpe/fpa11.inl b/arch/arm/nwfpe/fpa11.inl
new file mode 100644
index 000000000..321ab7c1c
--- /dev/null
+++ b/arch/arm/nwfpe/fpa11.inl
@@ -0,0 +1,47 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+
+/* Read and write floating point status register */
+extern __inline__ unsigned int readFPSR(void)
+{
+  return(fpa11->fpsr);
+}
+
+extern __inline__ void writeFPSR(FPSR reg)
+{
+  /* the sysid byte in the status register is readonly */
+  fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID);
+}
+
+/* Read and write floating point control register */
+extern __inline__ FPCR readFPCR(void)
+{
+  /* clear SB, AB and DA bits before returning FPCR */
+  return(fpa11->fpcr & ~MASK_RFC);
+}
+
+extern __inline__ void writeFPCR(FPCR reg)
+{
+  fpa11->fpcr &= ~MASK_WFC;		/* clear SB, AB and DA bits */
+  fpa11->fpcr |= (reg & MASK_WFC);	/* write SB, AB and DA bits */
+}
diff --git a/arch/arm/nwfpe/fpa11_cpdo.c b/arch/arm/nwfpe/fpa11_cpdo.c
new file mode 100644
index 000000000..c337c553a
--- /dev/null
+++ b/arch/arm/nwfpe/fpa11_cpdo.c
@@ -0,0 +1,117 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "fpa11.h"
+#include "fpopcode.h"
+
+unsigned int SingleCPDO(const unsigned int opcode);
+unsigned int DoubleCPDO(const unsigned int opcode);
+unsigned int ExtendedCPDO(const unsigned int opcode);
+
+unsigned int EmulateCPDO(const unsigned int opcode)
+{
+   unsigned int Fd, nType, nDest, nRc = 1;
+   
+   //fp_printk("EmulateCPDO(0x%08x)\n",opcode);
+
+   /* Get the destination size.  If not valid let Linux perform
+      an invalid instruction trap. */
+   nDest = getDestinationSize(opcode);
+   if (typeNone == nDest) return 0;
+   
+   SetRoundingMode(opcode);
+     
+   /* Compare the size of the operands in Fn and Fm.
+      Choose the largest size and perform operations in that size,
+      in order to make use of all the precision of the operands. 
+      If Fm is a constant, we just grab a constant of a size 
+      matching the size of the operand in Fn. */
+   if (MONADIC_INSTRUCTION(opcode))
+     nType = nDest;
+   else
+     nType = fpa11->fpreg[getFn(opcode)].fType;
+   
+   if (!CONSTANT_FM(opcode))
+   {
+     register unsigned int Fm = getFm(opcode);
+     if (nType < fpa11->fpreg[Fm].fType)
+     {
+        nType = fpa11->fpreg[Fm].fType;
+     }
+   }
+
+   switch (nType)
+   {
+      case typeSingle   : nRc = SingleCPDO(opcode);   break;
+      case typeDouble   : nRc = DoubleCPDO(opcode);   break;
+      case typeExtended : nRc = ExtendedCPDO(opcode); break;
+      default           : nRc = 0;
+   }
+
+   /* If the operation succeeded, check to see if the result in the
+      destination register is the correct size.  If not force it
+      to be. */
+   Fd = getFd(opcode);
+   nType = fpa11->fpreg[Fd].fType;
+   if ((0 != nRc) && (nDest != nType))
+   {
+     switch (nDest)
+     {
+       case typeSingle:
+       {
+         if (typeDouble == nType)
+           fpa11->fpreg[Fd].fValue.fSingle = 
+              float64_to_float32(fpa11->fpreg[Fd].fValue.fDouble);
+         else
+           fpa11->fpreg[Fd].fValue.fSingle = 
+              floatx80_to_float32(fpa11->fpreg[Fd].fValue.fExtended);
+       }
+       break;
+          
+       case typeDouble:
+       {
+         if (typeSingle == nType)
+           fpa11->fpreg[Fd].fValue.fDouble = 
+              float32_to_float64(fpa11->fpreg[Fd].fValue.fSingle);
+         else
+           fpa11->fpreg[Fd].fValue.fDouble = 
+              floatx80_to_float64(fpa11->fpreg[Fd].fValue.fExtended);
+       }
+       break;
+          
+       case typeExtended:
+       {
+         if (typeSingle == nType)
+           fpa11->fpreg[Fd].fValue.fExtended = 
+              float32_to_floatx80(fpa11->fpreg[Fd].fValue.fSingle);
+         else
+           fpa11->fpreg[Fd].fValue.fExtended = 
+              float64_to_floatx80(fpa11->fpreg[Fd].fValue.fDouble);
+       }
+       break;
+     }
+     
+     fpa11->fpreg[Fd].fType = nDest;
+   }
+   
+   return nRc;
+}
diff --git a/arch/arm/nwfpe/fpa11_cpdt.c b/arch/arm/nwfpe/fpa11_cpdt.c
new file mode 100644
index 000000000..9617a79a3
--- /dev/null
+++ b/arch/arm/nwfpe/fpa11_cpdt.c
@@ -0,0 +1,330 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+    (c) Philip Blundell, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+#include <asm/uaccess.h>
+
+extern __inline__
+void loadSingle(const unsigned int Fn,const unsigned int *pMem)
+{
+   fpa11->fpreg[Fn].fType = typeSingle;
+   get_user(fpa11->fpreg[Fn].fValue.fSingle, pMem);
+}
+
+extern __inline__
+void loadDouble(const unsigned int Fn,const unsigned int *pMem)
+{
+   unsigned int *p;
+   p = (unsigned int*)&fpa11->fpreg[Fn].fValue.fDouble;
+   fpa11->fpreg[Fn].fType = typeDouble;
+   get_user(p[0], &pMem[1]);
+   get_user(p[1], &pMem[0]); /* sign & exponent */
+}   
+
+extern __inline__
+void loadExtended(const unsigned int Fn,const unsigned int *pMem)
+{
+   unsigned int *p;
+   p = (unsigned int*)&fpa11->fpreg[Fn].fValue.fExtended;
+   fpa11->fpreg[Fn].fType = typeExtended;
+   get_user(p[0], &pMem[0]);  /* sign & exponent */
+   get_user(p[1], &pMem[2]);  /* ls bits */
+   get_user(p[2], &pMem[1]);  /* ms bits */
+}   
+
+extern __inline__
+void loadMultiple(const unsigned int Fn,const unsigned int *pMem)
+{
+   register unsigned int *p;
+   unsigned long x;
+
+   p = (unsigned int*)&(fpa11->fpreg[Fn].fValue);
+   get_user(x, &pMem[0]);
+   fpa11->fpreg[Fn].fType = (x >> 14) & 0x00000003;
+   
+   switch (fpa11->fpreg[Fn].fType)
+   {
+      case typeSingle:
+      case typeDouble:
+      {
+         get_user(p[0], &pMem[2]);  /* Single */
+         get_user(p[1], &pMem[1]);  /* double msw */
+         p[2] = 0;        /* empty */
+      }
+      break; 
+   
+      case typeExtended:
+      {
+         get_user(p[1], &pMem[2]);
+         get_user(p[2], &pMem[1]);  /* msw */
+         p[0] = (x & 0x80003fff);      
+      }
+      break;
+   }
+}
+
+extern __inline__
+void storeSingle(const unsigned int Fn,unsigned int *pMem)
+{
+   float32 val;
+   register unsigned int *p = (unsigned int*)&val;
+   
+   switch (fpa11->fpreg[Fn].fType)
+   {
+      case typeDouble: 
+         val = float64_to_float32(fpa11->fpreg[Fn].fValue.fDouble);
+      break;
+
+      case typeExtended: 
+         val = floatx80_to_float32(fpa11->fpreg[Fn].fValue.fExtended);
+      break;
+
+      default: val = fpa11->fpreg[Fn].fValue.fSingle;
+   }
+  
+   put_user(p[0], pMem);
+}   
+
+extern __inline__
+void storeDouble(const unsigned int Fn,unsigned int *pMem)
+{
+   float64 val;
+   register unsigned int *p = (unsigned int*)&val;
+
+   switch (fpa11->fpreg[Fn].fType)
+   {
+      case typeSingle: 
+         val = float32_to_float64(fpa11->fpreg[Fn].fValue.fSingle);
+      break;
+
+      case typeExtended:
+         val = floatx80_to_float64(fpa11->fpreg[Fn].fValue.fExtended);
+      break;
+
+      default: val = fpa11->fpreg[Fn].fValue.fDouble;
+   }
+   put_user(p[1], &pMem[0]);	/* msw */
+   put_user(p[0], &pMem[1]);	/* lsw */
+}   
+
+extern __inline__
+void storeExtended(const unsigned int Fn,unsigned int *pMem)
+{
+   floatx80 val;
+   register unsigned int *p = (unsigned int*)&val;
+   
+   switch (fpa11->fpreg[Fn].fType)
+   {
+      case typeSingle: 
+         val = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle);
+      break;
+
+      case typeDouble: 
+         val = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble);
+      break;
+
+      default: val = fpa11->fpreg[Fn].fValue.fExtended;
+   }
+   
+   put_user(p[0], &pMem[0]); /* sign & exp */
+   put_user(p[1], &pMem[2]);
+   put_user(p[2], &pMem[1]); /* msw */
+}   
+
+extern __inline__
+void storeMultiple(const unsigned int Fn,unsigned int *pMem)
+{
+   register unsigned int nType, *p;
+   
+   p = (unsigned int*)&(fpa11->fpreg[Fn].fValue);
+   nType = fpa11->fpreg[Fn].fType;
+   
+   switch (nType)
+   {
+      case typeSingle:
+      case typeDouble:
+      {
+	 put_user(p[0], &pMem[2]); /* single */
+	 put_user(p[1], &pMem[1]); /* double msw */
+	 put_user(nType << 14, &pMem[0]);
+      }
+      break; 
+   
+      case typeExtended:
+      {
+	 put_user(p[2], &pMem[1]); /* msw */
+	 put_user(p[1], &pMem[2]);
+	 put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
+      }
+      break;
+   }
+}
+
+unsigned int PerformLDF(const unsigned int opcode)
+{
+   unsigned int *pBase, *pAddress, *pFinal, nRc = 1;
+   
+   //fp_printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
+
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode)) pBase += 2;
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   switch (opcode & MASK_TRANSFER_LENGTH)
+   {
+      case TRANSFER_SINGLE  : loadSingle(getFd(opcode),pAddress);   break;
+      case TRANSFER_DOUBLE  : loadDouble(getFd(opcode),pAddress);   break;
+      case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break;
+      default: nRc = 0;
+   }
+   
+   if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return nRc;
+}
+
+unsigned int PerformSTF(const unsigned int opcode)
+{
+   unsigned int *pBase, *pAddress, *pFinal, nRc = 1;
+   
+   //fp_printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
+   SetRoundingMode(ROUND_TO_NEAREST);
+   
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode)) pBase += 2;
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   switch (opcode & MASK_TRANSFER_LENGTH)
+   {
+      case TRANSFER_SINGLE  : storeSingle(getFd(opcode),pAddress);   break;
+      case TRANSFER_DOUBLE  : storeDouble(getFd(opcode),pAddress);   break;
+      case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break;
+      default: nRc = 0;
+   }
+   
+   if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return nRc;
+}
+
+unsigned int PerformLFM(const unsigned int opcode)
+{
+   unsigned int i, Fd, *pBase, *pAddress, *pFinal;
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode)) pBase += 2;
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   Fd = getFd(opcode);
+   for (i=getRegisterCount(opcode);i>0;i--)
+   {
+     loadMultiple(Fd,pAddress);
+     pAddress += 3; Fd++;
+     if (Fd == 8) Fd = 0;
+   }
+
+   if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return 1;
+}
+
+unsigned int PerformSFM(const unsigned int opcode)
+{
+   unsigned int i, Fd, *pBase, *pAddress, *pFinal;
+   
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode)) pBase += 2;
+   
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   Fd = getFd(opcode);
+   for (i=getRegisterCount(opcode);i>0;i--)
+   {
+     storeMultiple(Fd,pAddress);
+     pAddress += 3; Fd++;
+     if (Fd == 8) Fd = 0;
+   }
+
+   if (WRITE_BACK(opcode)) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return 1;
+}
+
+#if 1
+unsigned int EmulateCPDT(const unsigned int opcode)
+{
+  unsigned int nRc = 0;
+
+  //fp_printk("EmulateCPDT(0x%08x)\n",opcode);
+  
+  if (LDF_OP(opcode))
+  {
+    nRc = PerformLDF(opcode);
+  }
+  else if (LFM_OP(opcode))
+  {
+    nRc = PerformLFM(opcode);
+  }
+  else if (STF_OP(opcode))
+  {
+    nRc = PerformSTF(opcode);
+  } 
+  else if (SFM_OP(opcode))
+  {
+    nRc = PerformSFM(opcode);
+  }
+  else
+  {
+    nRc = 0;
+  }
+  
+  return nRc;
+}
+#endif
diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c
new file mode 100644
index 000000000..bfe13ba1f
--- /dev/null
+++ b/arch/arm/nwfpe/fpa11_cprt.c
@@ -0,0 +1,313 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+    (c) Philip Blundell, 1999
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "milieu.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.h"
+#include "fpa11.inl"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+extern flag floatx80_is_nan(floatx80);
+extern flag float64_is_nan( float64);
+extern flag float32_is_nan( float32);
+
+void SetRoundingMode(const unsigned int opcode);
+
+unsigned int PerformFLT(const unsigned int opcode);
+unsigned int PerformFIX(const unsigned int opcode);
+
+static unsigned int
+PerformComparison(const unsigned int opcode);
+
+unsigned int EmulateCPRT(const unsigned int opcode)
+{
+  unsigned int nRc = 1;
+
+  //fp_printk("EmulateCPRT(0x%08x)\n",opcode);
+
+  if (opcode & 0x800000)
+  {
+     /* This is some variant of a comparison (PerformComparison will
+	sort out which one).  Since most of the other CPRT
+	instructions are oddball cases of some sort or other it makes
+	sense to pull this out into a fast path.  */
+     return PerformComparison(opcode);
+  }
+
+  /* Hint to GCC that we'd like a jump table rather than a load of CMPs */
+  switch ((opcode & 0x700000) >> 20)
+  {
+    case  FLT_CODE >> 20: nRc = PerformFLT(opcode); break;
+    case  FIX_CODE >> 20: nRc = PerformFIX(opcode); break;
+    
+    case  WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break;
+    case  RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break;
+
+#if 0
+    /* ?? Not at all sure about the mode checks here.  Linux never
+       calls the emulator from a non-USR fault but we always run in SVC
+       mode.  Is there even any point trying to emulate the way FPA11
+       behaves in this respect?
+
+       No - and I quote: 'The FPCR may only be present in some
+       implementations: it is there to control the hardware in an
+       implementation-specific manner, ...  The user mode of the
+       ARM is not permitted to use this register, and the WFC and
+       RFC instructions will trap if tried from user mode.'
+       Therefore, we do not provide the RFC and WFC instructions.
+        (rmk, 3/05/1999)
+     */
+    case  WFC_CODE >> 20:
+    {
+       int mode = 0;
+       __asm__ volatile ("mrs %0, cpsr; and %0, %0, #0x1f;" : : "g" (mode));
+       nRc = (0x13 == mode) ? 1 : 0;	/* in SVC processor mode? */
+       if (nRc) writeFPCR(readRegister(getRd(opcode)));
+    }
+    break;
+    
+    case  RFC_CODE >> 20:
+    {
+       int mode = 0;
+       __asm__ volatile ("mrs %0, cpsr; and %0, %0, #0x1f;" : : "g" (mode));
+       nRc = (0x13 == mode) ? 1 : 0;	/* in SVC processor mode? */
+       if (nRc) writeRegister(getRd(opcode),readFPCR()); break;
+    }
+    break;
+#endif
+
+    default: nRc = 0;
+  }
+  
+  return nRc;
+}
+
+unsigned int PerformFLT(const unsigned int opcode)
+{
+   unsigned int nRc = 1;
+   SetRoundingMode(opcode);
+   SetRoundingPrecision(opcode);
+   
+   switch (opcode & MASK_ROUNDING_PRECISION)
+   {
+      case ROUND_SINGLE:
+      {
+        fpa11->fpreg[getFn(opcode)].fType = typeSingle;
+        fpa11->fpreg[getFn(opcode)].fValue.fSingle =
+	   int32_to_float32(readRegister(getRd(opcode)));
+      }
+      break;
+
+      case ROUND_DOUBLE:
+      {
+        fpa11->fpreg[getFn(opcode)].fType = typeDouble;
+        fpa11->fpreg[getFn(opcode)].fValue.fDouble =
+            int32_to_float64(readRegister(getRd(opcode)));
+      }
+      break;
+        
+      case ROUND_EXTENDED:
+      {
+        fpa11->fpreg[getFn(opcode)].fType = typeExtended;
+        fpa11->fpreg[getFn(opcode)].fValue.fExtended =
+	   int32_to_floatx80(readRegister(getRd(opcode)));
+      }
+      break;
+      
+      default: nRc = 0;
+  }
+  
+  return nRc;
+}
+
+unsigned int PerformFIX(const unsigned int opcode)
+{
+   unsigned int nRc = 1;
+   unsigned int Fn = getFm(opcode);
+   
+   SetRoundingMode(opcode);
+
+   switch (fpa11->fpreg[Fn].fType)
+   {
+      case typeSingle:
+      {
+         writeRegister(getRd(opcode),
+	               float32_to_int32(fpa11->fpreg[Fn].fValue.fSingle));
+      }
+      break;
+
+      case typeDouble:
+      {
+         writeRegister(getRd(opcode),
+	               float64_to_int32(fpa11->fpreg[Fn].fValue.fDouble));
+      }
+      break;
+      	               
+      case typeExtended:
+      {
+         writeRegister(getRd(opcode),
+	               floatx80_to_int32(fpa11->fpreg[Fn].fValue.fExtended));
+      }
+      break;
+      
+      default: nRc = 0;
+  }
+  
+  return nRc;
+}
+
+   
+static unsigned int __inline__
+PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
+{
+   unsigned int flags = 0;
+
+   /* test for less than condition */
+   if (floatx80_lt(Fn,Fm))
+   {
+      flags |= CC_NEGATIVE;
+   }
+  
+   /* test for equal condition */
+   if (floatx80_eq(Fn,Fm))
+   {
+      flags |= CC_ZERO;
+   }
+
+   /* test for greater than or equal condition */
+   if (floatx80_lt(Fm,Fn))
+   {
+      flags |= CC_CARRY;
+   }
+   
+   writeConditionCodes(flags);
+   return 1;
+}
+
+/* This instruction sets the flags N, Z, C, V in the FPSR. */
+   
+static unsigned int PerformComparison(const unsigned int opcode)
+{
+   unsigned int Fn, Fm;
+   floatx80 rFn, rFm;
+   int e_flag = opcode & 0x400000;	/* 1 if CxFE */
+   int n_flag = opcode & 0x200000;	/* 1 if CNxx */
+   unsigned int flags = 0;
+
+   //fp_printk("PerformComparison(0x%08x)\n",opcode);
+
+   Fn = getFn(opcode);
+   Fm = getFm(opcode);
+
+   /* Check for unordered condition and convert all operands to 80-bit
+      format.
+      ?? Might be some mileage in avoiding this conversion if possible.
+      Eg, if both operands are 32-bit, detect this and do a 32-bit
+      comparison (cheaper than an 80-bit one).  */
+   switch (fpa11->fpreg[Fn].fType)
+   {
+      case typeSingle: 
+        //fp_printk("single.\n");
+	if (float32_is_nan(fpa11->fpreg[Fn].fValue.fSingle))
+	   goto unordered;
+        rFn = float32_to_floatx80(fpa11->fpreg[Fn].fValue.fSingle);
+      break;
+
+      case typeDouble: 
+        //fp_printk("double.\n");
+	if (float64_is_nan(fpa11->fpreg[Fn].fValue.fDouble))
+	   goto unordered;
+        rFn = float64_to_floatx80(fpa11->fpreg[Fn].fValue.fDouble);
+      break;
+      
+      case typeExtended: 
+        //fp_printk("extended.\n");
+	if (floatx80_is_nan(fpa11->fpreg[Fn].fValue.fExtended))
+	   goto unordered;
+        rFn = fpa11->fpreg[Fn].fValue.fExtended;
+      break;
+      
+      default: return 0;
+   }
+
+   if (CONSTANT_FM(opcode))
+   {
+     //fp_printk("Fm is a constant: #%d.\n",Fm);
+     rFm = getExtendedConstant(Fm);
+     if (floatx80_is_nan(rFm))
+        goto unordered;
+   }
+   else
+   {
+     //fp_printk("Fm = r%d which contains a ",Fm);
+      switch (fpa11->fpreg[Fm].fType)
+      {
+         case typeSingle: 
+           //fp_printk("single.\n");
+	   if (float32_is_nan(fpa11->fpreg[Fm].fValue.fSingle))
+	      goto unordered;
+           rFm = float32_to_floatx80(fpa11->fpreg[Fm].fValue.fSingle);
+         break;
+
+         case typeDouble: 
+           //fp_printk("double.\n");
+	   if (float64_is_nan(fpa11->fpreg[Fm].fValue.fDouble))
+	      goto unordered;
+           rFm = float64_to_floatx80(fpa11->fpreg[Fm].fValue.fDouble);
+         break;
+      
+         case typeExtended: 
+           //fp_printk("extended.\n");
+	   if (floatx80_is_nan(fpa11->fpreg[Fm].fValue.fExtended))
+	      goto unordered;
+           rFm = fpa11->fpreg[Fm].fValue.fExtended;
+         break;
+      
+         default: return 0;
+      }
+   }
+
+   if (n_flag)
+   {
+      rFm.high ^= 0x8000;
+   }
+
+   return PerformComparisonOperation(rFn,rFm);
+
+ unordered:
+   /* ?? The FPA data sheet is pretty vague about this, in particular
+      about whether the non-E comparisons can ever raise exceptions.
+      This implementation is based on a combination of what it says in
+      the data sheet, observation of how the Acorn emulator actually
+      behaves (and how programs expect it to) and guesswork.  */
+   flags |= CC_OVERFLOW;
+
+   if (BIT_AC & readFPSR()) flags |= CC_CARRY;
+
+   if (e_flag) float_raise(float_flag_invalid);
+
+   writeConditionCodes(flags);
+   return 1;
+}
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
new file mode 100644
index 000000000..fb05fc6fb
--- /dev/null
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -0,0 +1,167 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+    (c) Philip Blundell, 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+
+#ifdef MODULE
+#include <linux/module.h>
+#include <linux/version.h>
+#else
+#define MOD_INC_USE_COUNT
+#define MOD_DEC_USE_COUNT
+#endif
+
+/* XXX */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/pgtable.h>
+/* XXX */
+
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpmodule.h"
+#include "fpa11.h"
+#include "fpa11.inl"
+
+/* external data */
+extern FPA11 *fpa11;
+
+/* kernel symbols required for signal handling */
+typedef struct task_struct*	PTASK;
+
+#ifdef MODULE
+int fp_printk(const char *,...);
+void fp_send_sig(unsigned long sig, PTASK p, int priv);
+#if LINUX_VERSION_CODE > 0x20115
+MODULE_AUTHOR("Scott Bambrough <scottb@corelcomputer.com>");
+MODULE_DESCRIPTION("NWFPE floating point emulator");
+#endif
+
+#else
+#define fp_printk	printk
+#define fp_send_sig	send_sig
+#define kern_fp_enter	fp_enter
+#endif
+
+/* kernel function prototypes required */
+void C_SYMBOL_NAME(fp_setup)(void);
+
+/* external declarations for saved kernel symbols */
+extern unsigned int C_SYMBOL_NAME(kern_fp_enter);
+
+/* forward declarations */
+extern void nwfpe_enter(void);
+
+/* Original value of fp_enter from kernel before patched by fpe_init. */ 
+static unsigned int orig_fp_enter;
+
+/* Address of user registers on the kernel stack. */
+unsigned int *userRegisters;
+
+void __init C_SYMBOL_NAME(fpe_version)(void)
+{
+  static const char szTitle[] = "<4>NetWinder Floating Point Emulator ";
+  static const char szVersion[] = "V0.94.1 ";
+  static const char szCopyright[] = "(c) 1998 Corel Computer Corp.\n";
+  C_SYMBOL_NAME(fp_printk)(szTitle);
+  C_SYMBOL_NAME(fp_printk)(szVersion);
+  C_SYMBOL_NAME(fp_printk)(szCopyright);
+}
+
+int __init fpe_init(void)
+{
+  /* Display title, version and copyright information. */
+  C_SYMBOL_NAME(fpe_version)();
+
+  /* Save pointer to the old FP handler and then patch ourselves in */
+  orig_fp_enter = C_SYMBOL_NAME(kern_fp_enter);
+  C_SYMBOL_NAME(kern_fp_enter) = (unsigned int)C_SYMBOL_NAME(nwfpe_enter);
+
+  return 0;
+}
+
+#ifdef MODULE
+int init_module(void)
+{
+  return(fpe_init());
+}
+
+void cleanup_module(void)
+{
+  /* Restore the values we saved earlier. */
+  C_SYMBOL_NAME(kern_fp_enter) = orig_fp_enter;
+}
+#endif
+
+#define _ARM_pc 60
+#define _ARM_cpsr 64
+
+/*
+ScottB:  November 4, 1998
+
+Moved this function out of softfloat-specialize into fpmodule.c.
+This effectively isolates all the changes required for integrating with the
+Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
+fpmodule.c to integrate with the NetBSD kernel (I hope!).
+
+[1/1/99: Not quite true any more unfortunately.  There is Linux-specific
+code to access data in user space in some other source files at the 
+moment.  --philb]
+
+float_exception_flags is a global variable in SoftFloat.
+
+This function is called by the SoftFloat routines to raise a floating
+point exception.  We check the trap enable byte in the FPSR, and raise
+a SIGFPE exception if necessary.  If not the relevant bits in the 
+cumulative exceptions flag byte are set and we return.
+*/
+
+void float_raise(signed char flags)
+{
+#if 0
+  printk(KERN_DEBUG "NWFPE: exception %08x at %08x from %08x\n", flags,
+	 __builtin_return_address(0), userRegisters[15]);
+#endif
+
+  float_exception_flags |= flags;
+  if (readFPSR() & (flags << 16))
+  {
+    /* raise exception */
+    C_SYMBOL_NAME(fp_send_sig)(SIGFPE,C_SYMBOL_NAME(current),1);
+  }
+  else
+  {
+    /* set the cumulative exceptions flags */
+    writeFPSR(flags);
+  }
+}
diff --git a/arch/arm/nwfpe/fpmodule.h b/arch/arm/nwfpe/fpmodule.h
new file mode 100644
index 000000000..39c762935
--- /dev/null
+++ b/arch/arm/nwfpe/fpmodule.h
@@ -0,0 +1,53 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPMODULE_H__
+#define __FPMODULE_H__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_CPU_32
+#define REG_ORIG_R0	17
+#define REG_CPSR	16
+#else
+#define REG_ORIG_R0	16
+#define REG_CPSR	15
+#endif
+
+#define REG_PC		15
+#define REG_LR		14
+#define REG_SP		13
+#define REG_IP		12
+#define REG_FP		11
+#define REG_R10		10
+#define REG_R9		9
+#define REG_R9		9
+#define REG_R8		8
+#define REG_R7		7
+#define REG_R6		6
+#define REG_R5		5
+#define REG_R4		4
+#define REG_R3		3
+#define REG_R2		2
+#define REG_R1		1
+#define REG_R0		0
+
+#endif
diff --git a/arch/arm/nwfpe/fpmodule.inl b/arch/arm/nwfpe/fpmodule.inl
new file mode 100644
index 000000000..c76b7fd55
--- /dev/null
+++ b/arch/arm/nwfpe/fpmodule.inl
@@ -0,0 +1,88 @@
+/*
+   NetWinder Floating Point Emulator
+   (c) Corel Computer Corporation, 1998
+
+   Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Address of user registers on the kernel stack. */
+extern unsigned int *userRegisters;
+
+extern __inline__
+unsigned int readRegister(const unsigned int nReg)
+{
+	/* Note: The CPU thinks it has dealt with the current instruction.  As
+	   a result the program counter has been advanced to the next
+	   instruction, and points 4 bytes beyond the actual instruction
+	   that caused the invalid instruction trap to occur.  We adjust
+	   for this in this routine.  LDF/STF instructions with Rn = PC
+	   depend on the PC being correct, as they use PC+8 in their 
+	   address calculations. */
+	unsigned int val = userRegisters[nReg];
+
+	if (REG_PC == nReg)
+		val -= 4;
+
+	return val;
+}
+
+extern __inline__
+void writeRegister(const unsigned int nReg, const unsigned int val)
+{
+	userRegisters[nReg] = val;
+}
+
+extern __inline__
+unsigned int readCPSR(void)
+{
+	return (readRegister(REG_CPSR));
+}
+
+extern __inline__
+void writeCPSR(const unsigned int val)
+{
+	writeRegister(REG_CPSR, val);
+}
+
+extern __inline__
+unsigned int readConditionCodes(void)
+{
+#ifdef __FPEM_TEST__
+	return (0);
+#else
+	return (readCPSR() & CC_MASK);
+#endif
+}
+
+extern __inline__
+void writeConditionCodes(const unsigned int val)
+{
+	unsigned int rval;
+
+	/*
+	 * Operate directly on userRegisters since
+	 * the CPSR may be the PC register itself.
+	 */
+	rval = userRegisters[REG_CPSR] & ~CC_MASK;
+	userRegisters[REG_CPSR] = rval | (val & CC_MASK);
+}
+
+extern __inline__
+unsigned int readMemoryInt(unsigned int *pMem)
+{
+	return *pMem;
+}
diff --git a/arch/arm/nwfpe/fpopcode.c b/arch/arm/nwfpe/fpopcode.c
new file mode 100644
index 000000000..aa91e1e95
--- /dev/null
+++ b/arch/arm/nwfpe/fpopcode.c
@@ -0,0 +1,164 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpsr.h"
+#include "fpa11.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+static floatx80 floatx80Constant[] = {
+  { 0x0000, 0x0000000000000000ULL},	/* extended 0.0 */
+  { 0x3fff, 0x8000000000000000ULL},	/* extended 1.0 */
+  { 0x4000, 0x8000000000000000ULL},	/* extended 2.0 */
+  { 0x4000, 0xc000000000000000ULL},	/* extended 3.0 */
+  { 0x4001, 0x8000000000000000ULL},	/* extended 4.0 */
+  { 0x4001, 0xa000000000000000ULL},	/* extended 5.0 */
+  { 0x3ffe, 0x8000000000000000ULL},	/* extended 0.5 */
+  { 0x4002, 0xa000000000000000ULL}	/* extended 10.0 */
+};  
+
+static float64 float64Constant[] = {
+  0x0000000000000000ULL,		/* double 0.0 */
+  0x3ff0000000000000ULL,		/* double 1.0 */
+  0x4000000000000000ULL,		/* double 2.0 */
+  0x4008000000000000ULL,		/* double 3.0 */
+  0x4010000000000000ULL,		/* double 4.0 */
+  0x4014000000000000ULL,		/* double 5.0 */
+  0x3fe0000000000000ULL,		/* double 0.5 */
+  0x4024000000000000ULL			/* double 10.0 */
+};  
+
+static float32 float32Constant[] = {
+  0x00000000,				/* single 0.0 */
+  0x3f800000,				/* single 1.0 */
+  0x40000000,				/* single 2.0 */
+  0x40400000,				/* single 3.0 */
+  0x40800000,				/* single 4.0 */
+  0x40a00000,				/* single 5.0 */
+  0x3f000000,				/* single 0.5 */
+  0x41200000				/* single 10.0 */
+};  
+
+floatx80 getExtendedConstant(const unsigned int nIndex)
+{
+   return floatx80Constant[nIndex];
+} 
+
+float64 getDoubleConstant(const unsigned int nIndex)
+{
+   return float64Constant[nIndex];
+} 
+
+float32 getSingleConstant(const unsigned int nIndex)
+{
+   return float32Constant[nIndex];
+} 
+
+unsigned int getTransferLength(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_TRANSFER_LENGTH)
+  {
+    case 0x00000000: nRc = 1; break; /* single precision */
+    case 0x00008000: nRc = 2; break; /* double precision */
+    case 0x00400000: nRc = 3; break; /* extended precision */
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+
+unsigned int getRegisterCount(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_REGISTER_COUNT)
+  {
+    case 0x00000000: nRc = 4; break;
+    case 0x00008000: nRc = 1; break;
+    case 0x00400000: nRc = 2; break;
+    case 0x00408000: nRc = 3; break;
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+
+unsigned int getRoundingPrecision(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_ROUNDING_PRECISION)
+  {
+    case 0x00000000: nRc = 1; break;
+    case 0x00000080: nRc = 2; break;
+    case 0x00080000: nRc = 3; break;
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+
+unsigned int getDestinationSize(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_DESTINATION_SIZE)
+  {
+    case 0x00000000: nRc = typeSingle; break;
+    case 0x00000080: nRc = typeDouble; break;
+    case 0x00080000: nRc = typeExtended; break;
+    default: nRc = typeNone;
+  }
+  
+  return(nRc);
+}
+
+/* contition code lookup table
+ index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ bit position in short is condition code: NZCV */
+unsigned short aCC[16] = {
+    0xF0F0, // EQ == Z set
+    0x0F0F, // NE
+    0xCCCC, // CS == C set
+    0x3333, // CC
+    0xFF00, // MI == N set
+    0x00FF, // PL
+    0xAAAA, // VS == V set
+    0x5555, // VC
+    0x0C0C, // HI == C set && Z clear
+    0xF3F3, // LS == C clear || Z set
+    0xAA55, // GE == (N==V)
+    0x55AA, // LT == (N!=V)
+    0x0A05, // GT == (!Z && (N==V))
+    0xF5FA, // LE == (Z || (N!=V))
+    0xFFFF, // AL always
+    0 // NV
+};
+
+unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes)
+{
+  return (aCC[opcode>>28] >> (ccodes>>28)) & 1;
+}
diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h
new file mode 100644
index 000000000..d6d7aa11a
--- /dev/null
+++ b/arch/arm/nwfpe/fpopcode.h
@@ -0,0 +1,376 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPOPCODE_H__
+#define __FPOPCODE_H__
+
+/*
+ARM Floating Point Instruction Classes
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+|c o n d|1 1 0 P|U|u|W|L|   Rn  |v|  Fd |0|0|0|1|  o f f s e t  | CPDT
+|c o n d|1 1 0 P|U|w|W|L|   Rn  |x|  Fd |0|0|0|1|  o f f s e t  | CPDT
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+|c o n d|1 1 1 0|a|b|c|d|e|  Fn |j|  Fd |0|0|0|1|f|g|h|0|i|  Fm | CPDO
+|c o n d|1 1 1 0|a|b|c|L|e|  Fn |   Rd  |0|0|0|1|f|g|h|1|i|  Fm | CPRT
+|c o n d|1 1 1 0|a|b|c|1|e|  Fn |1|1|1|1|0|0|0|1|f|g|h|1|i|  Fm | comparisons
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+
+CPDT		data transfer instructions
+		LDF, STF, LFM, SFM
+		
+CPDO		dyadic arithmetic instructions
+		ADF, MUF, SUF, RSF, DVF, RDF,
+		POW, RPW, RMF, FML, FDV, FRD, POL
+
+CPDO		monadic arithmetic instructions
+		MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP,
+		SIN, COS, TAN, ASN, ACS, ATN, URD, NRM
+		
+CPRT		joint arithmetic/data transfer instructions
+		FIX (arithmetic followed by load/store)
+		FLT (load/store followed by arithmetic)
+		CMF, CNF CMFE, CNFE (comparisons)
+		WFS, RFS (write/read floating point status register)
+		WFC, RFC (write/read floating point control register)
+
+cond		condition codes
+P		pre/post index bit: 0 = postindex, 1 = preindex
+U		up/down bit: 0 = stack grows down, 1 = stack grows up
+W		write back bit: 1 = update base register (Rn)
+L		load/store bit: 0 = store, 1 = load
+Rn		base register
+Rd		destination/source register		
+Fd		floating point destination register
+Fn		floating point source register
+Fm		floating point source register or floating point constant
+
+uv		transfer length (TABLE 1)
+wx		register count (TABLE 2)
+abcd		arithmetic opcode (TABLES 3 & 4)
+ef		destination size (rounding precision) (TABLE 5)
+gh		rounding mode (TABLE 6)
+j		dyadic/monadic bit: 0 = dyadic, 1 = monadic
+i 		constant bit: 1 = constant (TABLE 6)
+*/
+
+/*
+TABLE 1
++-------------------------+---+---+---------+---------+
+|  Precision              | u | v | FPSR.EP | length  |
++-------------------------+---+---+---------+---------+
+| Single                  | 0 � 0 |    x    | 1 words |
+| Double                  | 1 � 1 |    x    | 2 words |
+| Extended                | 1 � 1 |    x    | 3 words |
+| Packed decimal          | 1 � 1 |    0    | 3 words |
+| Expanded packed decimal | 1 � 1 |    1    | 4 words |
++-------------------------+---+---+---------+---------+
+Note: x = don't care
+*/
+
+/*
+TABLE 2
++---+---+---------------------------------+
+| w | x | Number of registers to transfer |
++---+---+---------------------------------+
+| 0 � 1 |  1                              |
+| 1 � 0 |  2                              |
+| 1 � 1 |  3                              |
+| 0 � 0 |  4                              |
++---+---+---------------------------------+
+*/
+
+/*
+TABLE 3: Dyadic Floating Point Opcodes
++---+---+---+---+----------+-----------------------+-----------------------+
+| a | b | c | d | Mnemonic | Description           | Operation             |
++---+---+---+---+----------+-----------------------+-----------------------+
+| 0 | 0 | 0 | 0 | ADF      | Add                   | Fd := Fn + Fm         |
+| 0 | 0 | 0 | 1 | MUF      | Multiply              | Fd := Fn * Fm         |
+| 0 | 0 | 1 | 0 | SUF      | Subtract              | Fd := Fn - Fm         |
+| 0 | 0 | 1 | 1 | RSF      | Reverse subtract      | Fd := Fm - Fn         |
+| 0 | 1 | 0 | 0 | DVF      | Divide                | Fd := Fn / Fm         |
+| 0 | 1 | 0 | 1 | RDF      | Reverse divide        | Fd := Fm / Fn         |
+| 0 | 1 | 1 | 0 | POW      | Power                 | Fd := Fn ^ Fm         |
+| 0 | 1 | 1 | 1 | RPW      | Reverse power         | Fd := Fm ^ Fn         |
+| 1 | 0 | 0 | 0 | RMF      | Remainder             | Fd := IEEE rem(Fn/Fm) |
+| 1 | 0 | 0 | 1 | FML      | Fast Multiply         | Fd := Fn * Fm         |
+| 1 | 0 | 1 | 0 | FDV      | Fast Divide           | Fd := Fn / Fm         |
+| 1 | 0 | 1 | 1 | FRD      | Fast reverse divide   | Fd := Fm / Fn         |
+| 1 | 1 | 0 | 0 | POL      | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm)  |
+| 1 | 1 | 0 | 1 |          | undefined instruction | trap                  |
+| 1 | 1 | 1 | 0 |          | undefined instruction | trap                  |
+| 1 | 1 | 1 | 1 |          | undefined instruction | trap                  |
++---+---+---+---+----------+-----------------------+-----------------------+
+Note: POW, RPW, POL are deprecated, and are available for backwards
+      compatibility only.
+*/
+
+/*
+TABLE 4: Monadic Floating Point Opcodes
++---+---+---+---+----------+-----------------------+-----------------------+
+| a | b | c | d | Mnemonic | Description           | Operation             |
++---+---+---+---+----------+-----------------------+-----------------------+
+| 0 | 0 | 0 | 0 | MVF      | Move                  | Fd := Fm              |
+| 0 | 0 | 0 | 1 | MNF      | Move negated          | Fd := - Fm            |
+| 0 | 0 | 1 | 0 | ABS      | Absolute value        | Fd := abs(Fm)         |
+| 0 | 0 | 1 | 1 | RND      | Round to integer      | Fd := int(Fm)         |
+| 0 | 1 | 0 | 0 | SQT      | Square root           | Fd := sqrt(Fm)        |
+| 0 | 1 | 0 | 1 | LOG      | Log base 10           | Fd := log10(Fm)       |
+| 0 | 1 | 1 | 0 | LGN      | Log base e            | Fd := ln(Fm)          |
+| 0 | 1 | 1 | 1 | EXP      | Exponent              | Fd := e ^ Fm          |
+| 1 | 0 | 0 | 0 | SIN      | Sine                  | Fd := sin(Fm)         |
+| 1 | 0 | 0 | 1 | COS      | Cosine                | Fd := cos(Fm)         |
+| 1 | 0 | 1 | 0 | TAN      | Tangent               | Fd := tan(Fm)         |
+| 1 | 0 | 1 | 1 | ASN      | Arc Sine              | Fd := arcsin(Fm)      |
+| 1 | 1 | 0 | 0 | ACS      | Arc Cosine            | Fd := arccos(Fm)      |
+| 1 | 1 | 0 | 1 | ATN      | Arc Tangent           | Fd := arctan(Fm)      |
+| 1 | 1 | 1 | 0 | URD      | Unnormalized round    | Fd := int(Fm)         |
+| 1 | 1 | 1 | 1 | NRM      | Normalize             | Fd := norm(Fm)        |
++---+---+---+---+----------+-----------------------+-----------------------+
+Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are
+      available for backwards compatibility only.
+*/
+
+/*
+TABLE 5
++-------------------------+---+---+
+|  Rounding Precision     | e | f |
++-------------------------+---+---+
+| IEEE Single precision   | 0 � 0 |
+| IEEE Double precision   | 0 � 1 |
+| IEEE Extended precision | 1 � 0 |
+| undefined (trap)        | 1 � 1 |
++-------------------------+---+---+
+*/
+
+/*
+TABLE 5
++---------------------------------+---+---+
+|  Rounding Mode                  | g | h |
++---------------------------------+---+---+
+| Round to nearest (default)      | 0 � 0 |
+| Round toward plus infinity      | 0 � 1 |
+| Round toward negative infinity  | 1 � 0 |
+| Round toward zero               | 1 � 1 |
++---------------------------------+---+---+
+*/
+
+/*
+===
+=== Definitions for load and store instructions
+===
+*/
+
+/* bit masks */
+#define BIT_PREINDEX	0x01000000
+#define BIT_UP		0x00800000
+#define BIT_WRITE_BACK	0x00200000
+#define BIT_LOAD	0x00100000
+
+/* masks for load/store */
+#define MASK_CPDT		0x0c000000  /* data processing opcode */
+#define MASK_OFFSET		0x000000ff
+#define MASK_TRANSFER_LENGTH	0x00408000
+#define MASK_REGISTER_COUNT	MASK_TRANSFER_LENGTH
+#define MASK_COPROCESSOR	0x00000f00
+
+/* Tests for transfer length */
+#define TRANSFER_SINGLE		0x00000000
+#define TRANSFER_DOUBLE		0x00008000
+#define TRANSFER_EXTENDED	0x00400000
+#define TRANSFER_PACKED		MASK_TRANSFER_LENGTH
+
+/* Get the coprocessor number from the opcode. */
+#define getCoprocessorNumber(opcode)	((opcode & MASK_COPROCESSOR) >> 8)
+
+/* Get the offset from the opcode. */
+#define getOffset(opcode)		(opcode & MASK_OFFSET)
+
+/* Tests for specific data transfer load/store opcodes. */
+#define TEST_OPCODE(opcode,mask)	(((opcode) & (mask)) == (mask))
+
+#define LOAD_OP(opcode)   TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD)
+#define STORE_OP(opcode)  ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT)
+
+#define LDF_OP(opcode)	(LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
+#define LFM_OP(opcode)	(LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
+#define STF_OP(opcode)	(STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
+#define SFM_OP(opcode)	(STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
+
+#define PREINDEXED(opcode)		((opcode & BIT_PREINDEX) != 0)
+#define POSTINDEXED(opcode)		((opcode & BIT_PREINDEX) == 0)
+#define BIT_UP_SET(opcode)		((opcode & BIT_UP) != 0)
+#define BIT_UP_CLEAR(opcode)		((opcode & BIT_DOWN) == 0)
+#define WRITE_BACK(opcode)		((opcode & BIT_WRITE_BACK) != 0)
+#define LOAD(opcode)			((opcode & BIT_LOAD) != 0)
+#define STORE(opcode)			((opcode & BIT_LOAD) == 0)
+
+/*
+===
+=== Definitions for arithmetic instructions
+===
+*/
+/* bit masks */
+#define BIT_MONADIC	0x00008000
+#define BIT_CONSTANT	0x00000008
+
+#define CONSTANT_FM(opcode)		((opcode & BIT_CONSTANT) != 0)
+#define MONADIC_INSTRUCTION(opcode)	((opcode & BIT_MONADIC) != 0)
+
+/* instruction identification masks */
+#define MASK_CPDO		0x0e000000  /* arithmetic opcode */
+#define MASK_ARITHMETIC_OPCODE	0x00f08000
+#define MASK_DESTINATION_SIZE	0x00080080
+
+/* dyadic arithmetic opcodes. */
+#define ADF_CODE	0x00000000
+#define MUF_CODE	0x00100000
+#define SUF_CODE	0x00200000
+#define RSF_CODE	0x00300000
+#define DVF_CODE	0x00400000
+#define RDF_CODE	0x00500000
+#define POW_CODE	0x00600000
+#define RPW_CODE	0x00700000
+#define RMF_CODE	0x00800000
+#define FML_CODE	0x00900000
+#define FDV_CODE	0x00a00000
+#define FRD_CODE	0x00b00000
+#define POL_CODE	0x00c00000
+/* 0x00d00000 is an invalid dyadic arithmetic opcode */
+/* 0x00e00000 is an invalid dyadic arithmetic opcode */
+/* 0x00f00000 is an invalid dyadic arithmetic opcode */
+
+/* monadic arithmetic opcodes. */
+#define MVF_CODE	0x00008000
+#define MNF_CODE	0x00108000
+#define ABS_CODE	0x00208000
+#define RND_CODE	0x00308000
+#define SQT_CODE	0x00408000
+#define LOG_CODE	0x00508000
+#define LGN_CODE	0x00608000
+#define EXP_CODE	0x00708000
+#define SIN_CODE	0x00808000
+#define COS_CODE	0x00908000
+#define TAN_CODE	0x00a08000
+#define ASN_CODE	0x00b08000
+#define ACS_CODE	0x00c08000
+#define ATN_CODE	0x00d08000
+#define URD_CODE	0x00e08000
+#define NRM_CODE	0x00f08000
+
+/*
+===
+=== Definitions for register transfer and comparison instructions
+===
+*/
+
+#define MASK_CPRT		0x0e000010  /* register transfer opcode */
+#define MASK_CPRT_CODE		0x00f00000
+#define FLT_CODE		0x00000000
+#define FIX_CODE		0x00100000
+#define WFS_CODE		0x00200000
+#define RFS_CODE		0x00300000
+#define WFC_CODE		0x00400000
+#define RFC_CODE		0x00500000
+#define CMF_CODE		0x00900000
+#define CNF_CODE		0x00b00000
+#define CMFE_CODE		0x00d00000
+#define CNFE_CODE		0x00f00000
+
+/*
+===
+=== Common definitions
+===
+*/
+
+/* register masks */
+#define MASK_Rd		0x0000f000
+#define MASK_Rn		0x000f0000
+#define MASK_Fd		0x00007000
+#define MASK_Fm		0x00000007
+#define MASK_Fn		0x00070000
+
+/* condition code masks */
+#define CC_MASK		0xf0000000
+#define CC_NEGATIVE	0x80000000
+#define CC_ZERO		0x40000000
+#define CC_CARRY	0x20000000
+#define CC_OVERFLOW	0x10000000
+#define CC_EQ		0x00000000
+#define CC_NE		0x10000000
+#define CC_CS		0x20000000
+#define CC_HS		CC_CS
+#define CC_CC		0x30000000
+#define CC_LO		CC_CC
+#define CC_MI		0x40000000
+#define CC_PL		0x50000000
+#define CC_VS		0x60000000
+#define CC_VC		0x70000000
+#define CC_HI		0x80000000
+#define CC_LS		0x90000000
+#define CC_GE		0xa0000000
+#define CC_LT		0xb0000000
+#define CC_GT		0xc0000000
+#define CC_LE		0xd0000000
+#define CC_AL		0xe0000000
+#define CC_NV		0xf0000000
+
+/* rounding masks/values */
+#define MASK_ROUNDING_MODE	0x00000060
+#define ROUND_TO_NEAREST	0x00000000
+#define ROUND_TO_PLUS_INFINITY	0x00000020
+#define ROUND_TO_MINUS_INFINITY	0x00000040
+#define ROUND_TO_ZERO		0x00000060
+
+#define MASK_ROUNDING_PRECISION	0x00080080
+#define ROUND_SINGLE		0x00000000
+#define ROUND_DOUBLE		0x00000080
+#define ROUND_EXTENDED		0x00080000
+
+/* Get the condition code from the opcode. */
+#define getCondition(opcode)		(opcode >> 28)
+
+/* Get the source register from the opcode. */
+#define getRn(opcode)			((opcode & MASK_Rn) >> 16)
+
+/* Get the destination floating point register from the opcode. */
+#define getFd(opcode)			((opcode & MASK_Fd) >> 12)
+
+/* Get the first source floating point register from the opcode. */
+#define getFn(opcode)		((opcode & MASK_Fn) >> 16)
+
+/* Get the second source floating point register from the opcode. */
+#define getFm(opcode)		(opcode & MASK_Fm)
+
+/* Get the destination register from the opcode. */
+#define getRd(opcode)		((opcode & MASK_Rd) >> 12)
+
+/* Get the rounding mode from the opcode. */
+#define getRoundingMode(opcode)		((opcode & MASK_ROUNDING_MODE) >> 5)
+
+float32 getSingleConstant(const unsigned int nIndex);
+float64 getDoubleConstant(const unsigned int nIndex);
+floatx80 getExtendedConstant(const unsigned int nIndex);
+
+unsigned int getRegisterCount(const unsigned int opcode);
+unsigned int getDestinationSize(const unsigned int opcode);
+
+#endif
diff --git a/arch/arm/nwfpe/fpsr.h b/arch/arm/nwfpe/fpsr.h
new file mode 100644
index 000000000..f58994ac2
--- /dev/null
+++ b/arch/arm/nwfpe/fpsr.h
@@ -0,0 +1,108 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPSR_H__
+#define __FPSR_H__
+
+/*
+The FPSR is a 32 bit register consisting of 4 parts, each exactly
+one byte.
+
+	SYSTEM ID
+	EXCEPTION TRAP ENABLE BYTE
+	SYSTEM CONTROL BYTE
+	CUMULATIVE EXCEPTION FLAGS BYTE
+	
+The FPCR is a 32 bit register consisting of bit flags.
+*/
+
+/* SYSTEM ID
+------------
+Note: the system id byte is read only  */
+
+typedef unsigned int FPSR;  /* type for floating point status register */
+typedef unsigned int FPCR;  /* type for floating point control register */
+
+#define MASK_SYSID		0xff000000
+#define BIT_HARDWARE		0x80000000
+#define FP_EMULATOR		0x01000000	/* System ID for emulator */ 
+#define FP_ACCELERATOR		0x81000000	/* System ID for FPA11 */
+
+/* EXCEPTION TRAP ENABLE BYTE
+----------------------------- */
+
+#define MASK_TRAP_ENABLE	0x00ff0000
+#define MASK_TRAP_ENABLE_STRICT	0x001f0000
+#define BIT_IXE		0x00100000   /* inexact exception enable */
+#define BIT_UFE		0x00080000   /* underflow exception enable */
+#define BIT_OFE		0x00040000   /* overflow exception enable */
+#define BIT_DZE		0x00020000   /* divide by zero exception enable */
+#define BIT_IOE		0x00010000   /* invalid operation exception enable */
+
+/* SYSTEM CONTROL BYTE
+---------------------- */
+
+#define MASK_SYSTEM_CONTROL	0x0000ff00
+#define MASK_TRAP_STRICT	0x00001f00
+
+#define BIT_AC	0x00100000	/* use alternative C-flag definition
+				   for compares */
+#define BIT_EP	0x00080000	/* use expanded packed decimal format */
+#define BIT_SO	0x00040000	/* select synchronous operation of FPA */
+#define BIT_NE	0x00020000	/* NaN exception bit */
+#define BIT_ND	0x00010000	/* no denormalized numbers bit */
+
+/* CUMULATIVE EXCEPTION FLAGS BYTE
+---------------------------------- */
+
+#define MASK_EXCEPTION_FLAGS		0x000000ff
+#define MASK_EXCEPTION_FLAGS_STRICT	0x0000001f
+
+#define BIT_IXC		0x00000010	/* inexact exception flag */
+#define BIT_UFC		0x00000008	/* underflow exception flag */
+#define BIT_OFC		0x00000004	/* overfloat exception flag */
+#define BIT_DZC		0x00000002	/* divide by zero exception flag */
+#define BIT_IOC		0x00000001	/* invalid operation exception flag */
+
+/* Floating Point Control Register
+----------------------------------*/
+
+#define BIT_RU		0x80000000	/* rounded up bit */
+#define BIT_IE		0x10000000	/* inexact bit */
+#define BIT_MO		0x08000000	/* mantissa overflow bit */
+#define BIT_EO		0x04000000	/* exponent overflow bit */
+#define BIT_SB		0x00000800	/* store bounce */
+#define BIT_AB		0x00000400	/* arithmetic bounce */
+#define BIT_RE		0x00000200	/* rounding exception */
+#define BIT_DA		0x00000100	/* disable FPA */
+
+#define MASK_OP		0x00f08010	/* AU operation code */
+#define MASK_PR		0x00080080	/* AU precision */
+#define MASK_S1		0x00070000	/* AU source register 1 */
+#define MASK_S2		0x00000007	/* AU source register 2 */
+#define MASK_DS		0x00007000	/* AU destination register */
+#define MASK_RM		0x00000060	/* AU rounding mode */
+#define MASK_ALU	0x9cfff2ff	/* only ALU can write these bits */
+#define MASK_RESET	0x00000d00	/* bits set on reset, all others cleared */
+#define MASK_WFC	MASK_RESET
+#define MASK_RFC	~MASK_RESET
+
+#endif
diff --git a/arch/arm/nwfpe/milieu.h b/arch/arm/nwfpe/milieu.h
new file mode 100644
index 000000000..a3892ab2d
--- /dev/null
+++ b/arch/arm/nwfpe/milieu.h
@@ -0,0 +1,48 @@
+
+/*
+===============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+/*
+-------------------------------------------------------------------------------
+Include common integer types and flags.
+-------------------------------------------------------------------------------
+*/
+#include "ARM-gcc.h"
+
+/*
+-------------------------------------------------------------------------------
+Symbolic Boolean literals.
+-------------------------------------------------------------------------------
+*/
+enum {
+    FALSE = 0,
+    TRUE  = 1
+};
+
diff --git a/arch/arm/nwfpe/single_cpdo.c b/arch/arm/nwfpe/single_cpdo.c
new file mode 100644
index 000000000..f8405ee57
--- /dev/null
+++ b/arch/arm/nwfpe/single_cpdo.c
@@ -0,0 +1,259 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Corel Computer Corporation, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@corelcomputer.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "config.h"
+#include "milieu.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.h"
+
+float32 getSingleConstant(unsigned int);
+
+float32 float32_exp(float32 Fm);
+float32 float32_ln(float32 Fm);
+float32 float32_sin(float32 rFm);
+float32 float32_cos(float32 rFm);
+float32 float32_arcsin(float32 rFm);
+float32 float32_arctan(float32 rFm);
+float32 float32_log(float32 rFm);
+float32 float32_tan(float32 rFm);
+float32 float32_arccos(float32 rFm);
+float32 float32_pow(float32 rFn,float32 rFm);
+float32 float32_pol(float32 rFn,float32 rFm);
+
+unsigned int SingleCPDO(const unsigned int opcode)
+{
+   float32 rFm, rFn;
+   unsigned int Fd, Fm, Fn, nRc = 1;
+
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getSingleConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fpreg[Fm].fType)
+     {
+        case typeSingle:
+          rFm = fpa11->fpreg[Fm].fValue.fSingle;
+        break;
+        
+        default: return 0;
+     }
+   }
+
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fpreg[Fn].fType)
+      {
+        case typeSingle:
+          rFn = fpa11->fpreg[Fn].fValue.fSingle;
+        break;
+
+        default: return 0;
+      }
+   }
+
+   Fd = getFd(opcode);
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_add(rFn,rFm);
+      break;
+
+      case MUF_CODE:
+      case FML_CODE:
+        fpa11->fpreg[Fd].fValue.fSingle = float32_mul(rFn,rFm);
+      break;
+
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_sub(rFn,rFm);
+      break;
+
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_sub(rFm,rFn);
+      break;
+
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_div(rFn,rFm);
+      break;
+
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_div(rFm,rFn);
+      break;
+
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_pow(rFn,rFm);
+      break;
+
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_pow(rFm,rFn);
+      break;
+#endif
+
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_rem(rFn,rFm);
+      break;
+
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_pol(rFn,rFm);
+      break;
+#endif
+
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = rFm;
+      break;
+
+      case MNF_CODE:
+         rFm ^= 0x80000000;
+         fpa11->fpreg[Fd].fValue.fSingle = rFm;
+      break;
+
+      case ABS_CODE:
+         rFm &= 0x7fffffff;
+         fpa11->fpreg[Fd].fValue.fSingle = rFm;
+      break;
+
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = 
+             int32_to_float32(float32_to_int32(rFm));
+      break;
+
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_sqrt(rFm);
+      break;
+
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_log(rFm);
+      break;
+
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_ln(rFm);
+      break;
+
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_exp(rFm);
+      break;
+
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_sin(rFm);
+      break;
+
+      case COS_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_cos(rFm);
+      break;
+
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_tan(rFm);
+      break;
+
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_arcsin(rFm);
+      break;
+
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_arccos(rFm);
+      break;
+
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fValue.fSingle = float32_arctan(rFm);
+      break;
+#endif
+
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+
+   if (0 != nRc) fpa11->fpreg[Fd].fType = typeSingle;
+   return nRc;
+}
+
+#if 0
+float32 float32_exp(float32 Fm)
+{
+//series
+}
+
+float32 float32_ln(float32 Fm)
+{
+//series
+}
+
+float32 float32_sin(float32 rFm)
+{
+//series
+}
+
+float32 float32_cos(float32 rFm)
+{
+//series
+}
+
+float32 float32_arcsin(float32 rFm)
+{
+//series
+}
+
+float32 float32_arctan(float32 rFm)
+{
+  //series
+}
+
+float32 float32_arccos(float32 rFm)
+{
+   //return float32_sub(halfPi,float32_arcsin(rFm));
+}
+
+float32 float32_log(float32 rFm)
+{
+  return float32_div(float32_ln(rFm),getSingleConstant(7));
+}
+
+float32 float32_tan(float32 rFm)
+{
+  return float32_div(float32_sin(rFm),float32_cos(rFm));
+}
+
+float32 float32_pow(float32 rFn,float32 rFm)
+{
+  return float32_exp(float32_mul(rFm,float32_ln(rFn))); 
+}
+
+float32 float32_pol(float32 rFn,float32 rFm)
+{
+  return float32_arctan(float32_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm/nwfpe/softfloat-macros b/arch/arm/nwfpe/softfloat-macros
new file mode 100644
index 000000000..5469989f2
--- /dev/null
+++ b/arch/arm/nwfpe/softfloat-macros
@@ -0,0 +1,740 @@
+
+/*
+===============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+/*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 32, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 64, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+ __asm__("@shift64RightJamming -- start");   
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+ __asm__("@shift64RightJamming -- end");   
+    *zPtr = z;
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+_plus_ the number of bits given in `count'.  The shifted result is at most
+64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+bits shifted off form a second 64-bit result as follows:  The _last_ bit
+shifted off is the most-significant bit of the extra result, and the other
+63 bits of the extra result are all zero if and only if _all_but_the_last_
+bits shifted off were all zero.  This extra result is stored in the location
+pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0' and `a1' are considered to form a
+fixed-point value with binary point between `a0' and `a1'.  This fixed-point
+value is shifted right by the number of bits given in `count', and the
+integer part of the result is returned at the location pointed to by
+`z0Ptr'.  The fractional part of the result may be slightly corrupted as
+described above, and is returned at the location pointed to by `z1Ptr'.)
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' can be arbitrarily large; in particular, if `count' is greater
+than 128, the result will be 0.  The result is broken into two 64-bit pieces
+which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  If any nonzero bits are shifted off, they
+are ``jammed'' into the least significant bit of the result by setting the
+least significant bit to 1.  The value of `count' can be arbitrarily large;
+in particular, if `count' is greater than 128, the result will be either 0
+or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+nonzero.  The result is broken into two 64-bit pieces which are stored at
+the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+by 64 _plus_ the number of bits given in `count'.  The shifted result is
+at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+off form a third 64-bit result as follows:  The _last_ bit shifted off is
+the most-significant bit of the extra result, and the other 63 bits of the
+extra result are all zero if and only if _all_but_the_last_ bits shifted off
+were all zero.  This extra result is stored in the location pointed to by
+`z2Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0', `a1', and `a2' are considered
+to form a fixed-point value with binary point between `a1' and `a2'.  This
+fixed-point value is shifted right by the number of bits given in `count',
+and the integer part of the result is returned at the locations pointed to
+by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+corrupted as described above, and is returned at the location pointed to by
+`z2Ptr'.)
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' must be less than 64.  The result is broken into two 64-bit
+pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+by the number of bits given in `count'.  Any bits shifted off are lost.
+The value of `count' must be less than 64.  The result is broken into three
+64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+any carry out is lost.  The result is broken into two 64-bit pieces which
+are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+modulo 2^192, so any carry out is lost.  The result is broken into three
+64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+2^128, so any borrow out (carry out) is lost.  The result is broken into two
+64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+`z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+result is broken into three 64-bit pieces which are stored at the locations
+pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+into two 64-bit pieces which are stored at the locations pointed to by
+`z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
+obtain a 192-bit product.  The product is broken into three 64-bit pieces
+which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+`z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+product.  The product is broken into four 64-bit pieces which are stored at
+the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns an approximation to the 64-bit integer quotient obtained by dividing
+`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+toward zero, the approximation returned lies between q and q + 2 inclusive.
+If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+unsigned integer is returned.
+-------------------------------------------------------------------------------
+*/
+static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns an approximation to the square root of the 32-bit significand given
+by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+`aExp' (the least significant bit) is 1, the integer returned approximates
+2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+case, the approximation returned lies strictly within +/-2 of the exact
+value.
+-------------------------------------------------------------------------------
+*/
+static bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit
+of `a'.  If `a' is zero, 32 is returned.
+-------------------------------------------------------------------------------
+*/
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit
+of `a'.  If `a' is zero, 64 is returned.
+-------------------------------------------------------------------------------
+*/
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
diff --git a/arch/arm/nwfpe/softfloat-specialize b/arch/arm/nwfpe/softfloat-specialize
new file mode 100644
index 000000000..f03e5c6d4
--- /dev/null
+++ b/arch/arm/nwfpe/softfloat-specialize
@@ -0,0 +1,471 @@
+
+/*
+===============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+/*
+-------------------------------------------------------------------------------
+Underflow tininess-detection mode, statically initialized to default value.
+(The declaration in `softfloat.h' must match the `int8' type here.)
+-------------------------------------------------------------------------------
+*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*
+-------------------------------------------------------------------------------
+Raises the exceptions specified by `flags'.  Floating-point traps can be
+defined here if desired.  It is currently not possible for such a trap to
+substitute a result value.  If traps are not implemented, this routine
+should be simply `float_exception_flags |= flags;'.
+
+ScottB:  November 4, 1998
+Moved this function out of softfloat-specialize into fpmodule.c.
+This effectively isolates all the changes required for integrating with the
+Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
+fpmodule.c to integrate with the NetBSD kernel (I hope!).
+-------------------------------------------------------------------------------
+void float_raise( int8 flags )
+{
+    float_exception_flags |= flags;
+}
+*/
+
+/*
+-------------------------------------------------------------------------------
+Internal canonical NaN format.
+-------------------------------------------------------------------------------
+*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated single-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the single-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two single-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated double-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the double-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two double-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated extended double-precision NaN.  The
+`high' and `low' values hold the most- and least-significant bits,
+respectively.
+-------------------------------------------------------------------------------
+*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is a
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is a
+signaling NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    //register int lr;
+    bits64 aLow;
+
+    //__asm__("mov %0, lr" : : "g" (lr));
+    //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr);
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the extended
+double-precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two extended double-precision floating-point values `a' and `b', one
+of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+`b' is a signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated quadruple-precision NaN.  The `high' and
+`low' values hold the most- and least-significant bits, respectively.
+-------------------------------------------------------------------------------
+*/
+#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float128_is_nan( float128 a )
+{
+
+    return
+           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is a
+signaling NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float128_is_signaling_nan( float128 a )
+{
+
+    return
+           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the quadruple-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float128ToCommonNaN( float128 a )
+{
+    commonNaNT z;
+
+    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>63;
+    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the quadruple-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+    float128 z;
+
+    shift128Right( a.high, a.low, 16, &z.high, &z.low );
+    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two quadruple-precision floating-point values `a' and `b', one of
+which is a NaN, and returns the appropriate NaN result.  If either `a' or
+`b' is a signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float128 propagateFloat128NaN( float128 a, float128 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float128_is_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsNaN = float128_is_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+    a.high |= LIT64( 0x0000800000000000 );
+    b.high |= LIT64( 0x0000800000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c
new file mode 100644
index 000000000..a7fc76cc8
--- /dev/null
+++ b/arch/arm/nwfpe/softfloat.c
@@ -0,0 +1,4877 @@
+/*
+===============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+#include "milieu.h"
+#include "softfloat.h"
+
+/*
+-------------------------------------------------------------------------------
+Floating-point rounding mode, extended double-precision rounding precision,
+and exception flags.
+-------------------------------------------------------------------------------
+*/
+int8 float_rounding_mode = float_round_nearest_even;
+int8 floatx80_rounding_precision = 80;
+int8 float_exception_flags = 0;
+
+/*
+-------------------------------------------------------------------------------
+Primitive arithmetic functions, including multi-word arithmetic, and
+division and square root approximations.  (Can be specialized to target if
+desired.)
+-------------------------------------------------------------------------------
+*/
+#include "softfloat-macros"
+
+/*
+-------------------------------------------------------------------------------
+Functions and definitions to determine:  (1) whether tininess for underflow
+is detected before or after rounding by default, (2) what (if anything)
+happens when exceptions are raised, (3) how signaling NaNs are distinguished
+from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+are propagated from function inputs to output.  These details are target-
+specific.
+-------------------------------------------------------------------------------
+*/
+#include "softfloat-specialize"
+
+/*
+-------------------------------------------------------------------------------
+Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+and 7, and returns the properly rounded 32-bit integer corresponding to the
+input.  If `zSign' is nonzero, the input is negated before being converted
+to an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
+input is simply rounded to an integer, with the inexact exception raised if
+the input cannot be represented exactly as an integer.  If the fixed-point
+input is too large, however, the invalid exception is raised and the largest
+positive or negative integer is returned.
+-------------------------------------------------------------------------------
+*/
+static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    int32 z;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = absZ & 0x7F;
+    absZ = ( absZ + roundIncrement )>>7;
+    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    z = absZ;
+    if ( zSign ) z = - z;
+    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+        float_exception_flags |= float_flag_invalid;
+        return zSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits32 extractFloat32Frac( float32 a )
+{
+
+    return a & 0x007FFFFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int16 extractFloat32Exp( float32 a )
+{
+
+    return ( a>>23 ) & 0xFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat32Sign( float32 a )
+{
+
+    return a>>31;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal single-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros32( aSig ) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+single-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+#if 0
+   float32 f;
+   __asm__("@ packFloat32;
+   	    mov %0, %1, asl #31;
+   	    orr %0, %2, asl #23;
+   	    orr %0, %3"
+   	    : /* no outputs */
+   	    : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
+   	    : "cc");
+   return f;
+#else
+    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+#endif 
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the single-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal single-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 30
+and 29, which is 7 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    flag isTiny;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x7F;
+    if ( 0xFD <= (bits16) zExp ) {
+        if (    ( 0xFD < zExp )
+             || (    ( zExp == 0xFD )
+                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact );
+            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < 0x80000000 );
+            shift32RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x7F;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>7;
+    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat32' except that `zSig' does not have to be normalized in
+any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float32
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros32( zSig ) - 1;
+    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloat64Frac( float64 a )
+{
+
+    return a & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int16 extractFloat64Exp( float64 a )
+{
+
+    return ( a>>52 ) & 0x7FF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat64Sign( float64 a )
+{
+
+    return a>>63;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal double-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( aSig ) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+double-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+
+    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper double-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the double-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal double-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 62
+and 61, which is 10 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int16 roundIncrement, roundBits;
+    flag isTiny;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x200;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x3FF;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x3FF;
+    if ( 0x7FD <= (bits16) zExp ) {
+        if (    ( 0x7FD < zExp )
+             || (    ( zExp == 0x7FD )
+                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            //register int lr;
+            //__asm__("mov %0, lr" :: "g" (lr));
+            //fp_printk("roundAndPackFloat64 called from 0x%08x\n",lr);
+            float_raise( float_flag_overflow | float_flag_inexact );
+            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+            shift64RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x3FF;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>10;
+    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper double-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat64' except that `zSig' does not have to be normalized in
+any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float64
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( zSig ) - 1;
+    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the extended double-precision floating-point
+value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the extended double-precision floating-point
+value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the extended double-precision floating-point value
+`a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal extended double-precision floating-point value
+represented by the denormalized significand `aSig'.  The normalized exponent
+and significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( aSig );
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+extended double-precision floating-point value, returning the result.
+-------------------------------------------------------------------------------
+*/
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+    floatx80 z;
+
+    z.low = zSig;
+    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and extended significand formed by the concatenation of `zSig0' and `zSig1',
+and returns the proper extended double-precision floating-point value
+corresponding to the abstract input.  Ordinarily, the abstract value is
+rounded and packed into the extended double-precision format, with the
+inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal extended
+double-precision floating-point number.
+    If `roundingPrecision' is 32 or 64, the result is rounded to the same
+number of bits as single or double precision, respectively.  Otherwise, the
+result is rounded to the full precision of the extended double-precision
+format.
+    The input significand must be normalized or smaller.  If the input
+significand is not normalized, `zExp' must be 0; in that case, the result
+returned is a subnormal number, and it must not require rounding.  The
+handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80
+ roundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment, isTiny;
+    int64 roundIncrement, roundMask, roundBits;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    if ( roundingPrecision == 80 ) goto precision80;
+    if ( roundingPrecision == 64 ) {
+        roundIncrement = LIT64( 0x0000000000000400 );
+        roundMask = LIT64( 0x00000000000007FF );
+    }
+    else if ( roundingPrecision == 32 ) {
+        roundIncrement = LIT64( 0x0000008000000000 );
+        roundMask = LIT64( 0x000000FFFFFFFFFF );
+    }
+    else {
+        goto precision80;
+    }
+    zSig0 |= ( zSig1 != 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = roundMask;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+           ) {
+            goto overflow;
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ( zSig0 <= zSig0 + roundIncrement );
+            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( roundBits ) float_exception_flags |= float_flag_inexact;
+            zSig0 += roundIncrement;
+            if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+                roundMask |= roundIncrement;
+            }
+            zSig0 &= ~ roundMask;
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig0 += roundIncrement;
+    if ( zSig0 < roundIncrement ) {
+        ++zExp;
+        zSig0 = LIT64( 0x8000000000000000 );
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+        roundMask |= roundIncrement;
+    }
+    zSig0 &= ~ roundMask;
+    if ( zSig0 == 0 ) zExp = 0;
+    return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+    increment = ( (sbits64) zSig1 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && zSig1;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && zSig1;
+            }
+        }
+    }
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || (    ( zExp == 0x7FFE )
+                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+                  && increment
+                )
+           ) {
+            roundMask = 0;
+ overflow:
+            float_raise( float_flag_overflow | float_flag_inexact );
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+            }
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ! increment
+                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+            zExp = 0;
+            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+            if ( roundNearestEven ) {
+                increment = ( (sbits64) zSig1 < 0 );
+            }
+            else {
+                if ( zSign ) {
+                    increment = ( roundingMode == float_round_down ) && zSig1;
+                }
+                else {
+                    increment = ( roundingMode == float_round_up ) && zSig1;
+                }
+            }
+            if ( increment ) {
+                ++zSig0;
+                zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
+                if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            }
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+    if ( increment ) {
+        ++zSig0;
+        if ( zSig0 == 0 ) {
+            ++zExp;
+            zSig0 = LIT64( 0x8000000000000000 );
+        }
+        else {
+            zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
+        }
+    }
+    else {
+        if ( zSig0 == 0 ) zExp = 0;
+    }
+    
+    return packFloatx80( zSign, zExp, zSig0 );
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent
+`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+and returns the proper extended double-precision floating-point value
+corresponding to the abstract input.  This routine is just like
+`roundAndPackFloatx80' except that the input significand does not have to be
+normalized.
+-------------------------------------------------------------------------------
+*/
+static floatx80
+ normalizeRoundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+    int8 shiftCount;
+
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    shiftCount = countLeadingZeros64( zSig0 );
+    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Returns the least-significant 64 fraction bits of the quadruple-precision
+floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloat128Frac1( float128 a )
+{
+
+    return a.low;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the most-significant 48 fraction bits of the quadruple-precision
+floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloat128Frac0( float128 a )
+{
+
+    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the quadruple-precision floating-point value
+`a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int32 extractFloat128Exp( float128 a )
+{
+
+    return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the quadruple-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat128Sign( float128 a )
+{
+
+    return a.high>>63;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal quadruple-precision floating-point value
+represented by the denormalized significand formed by the concatenation of
+`aSig0' and `aSig1'.  The normalized exponent is stored at the location
+pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+significand are stored at the location pointed to by `zSig0Ptr', and the
+least significant 64 bits of the normalized significand are stored at the
+location pointed to by `zSig1Ptr'.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat128Subnormal(
+     bits64 aSig0,
+     bits64 aSig1,
+     int32 *zExpPtr,
+     bits64 *zSig0Ptr,
+     bits64 *zSig1Ptr
+ )
+{
+    int8 shiftCount;
+
+    if ( aSig0 == 0 ) {
+        shiftCount = countLeadingZeros64( aSig1 ) - 15;
+        if ( shiftCount < 0 ) {
+            *zSig0Ptr = aSig1>>( - shiftCount );
+            *zSig1Ptr = aSig1<<( shiftCount & 63 );
+        }
+        else {
+            *zSig0Ptr = aSig1<<shiftCount;
+            *zSig1Ptr = 0;
+        }
+        *zExpPtr = - shiftCount - 63;
+    }
+    else {
+        shiftCount = countLeadingZeros64( aSig0 ) - 15;
+        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+        *zExpPtr = 1 - shiftCount;
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', the exponent `zExp', and the significand formed
+by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+floating-point value, returning the result.  After being shifted into the
+proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+added together to form the most significant 32 bits of the result.  This
+means that any integer portion of `zSig0' will be added into the exponent.
+Since a properly normalized significand will have an integer portion equal
+to 1, the `zExp' input should be 1 less than the desired result exponent
+whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float128
+ packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+    float128 z;
+
+    z.low = zSig1;
+    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and extended significand formed by the concatenation of `zSig0', `zSig1',
+and `zSig2', and returns the proper quadruple-precision floating-point value
+corresponding to the abstract input.  Ordinarily, the abstract value is
+simply rounded and packed into the quadruple-precision format, with the
+inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal quadruple-
+precision floating-point number.
+    The input significand must be normalized or smaller.  If the input
+significand is not normalized, `zExp' must be 0; in that case, the result
+returned is a subnormal number, and it must not require rounding.  In the
+usual case that the input significand is normalized, `zExp' must be 1 less
+than the ``true'' floating-point exponent.  The handling of underflow and
+overflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float128
+ roundAndPackFloat128(
+     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment, isTiny;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    increment = ( (sbits64) zSig2 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && zSig2;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && zSig2;
+            }
+        }
+    }
+    if ( 0x7FFD <= (bits32) zExp ) {
+        if (    ( 0x7FFD < zExp )
+             || (    ( zExp == 0x7FFD )
+                  && eq128(
+                         LIT64( 0x0001FFFFFFFFFFFF ),
+                         LIT64( 0xFFFFFFFFFFFFFFFF ),
+                         zSig0,
+                         zSig1
+                     )
+                  && increment
+                )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact );
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return
+                    packFloat128(
+                        zSign,
+                        0x7FFE,
+                        LIT64( 0x0000FFFFFFFFFFFF ),
+                        LIT64( 0xFFFFFFFFFFFFFFFF )
+                    );
+            }
+            return packFloat128( zSign, 0x7FFF, 0, 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ! increment
+                || lt128(
+                       zSig0,
+                       zSig1,
+                       LIT64( 0x0001FFFFFFFFFFFF ),
+                       LIT64( 0xFFFFFFFFFFFFFFFF )
+                   );
+            shift128ExtraRightJamming(
+                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+            zExp = 0;
+            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
+            if ( roundNearestEven ) {
+                increment = ( (sbits64) zSig2 < 0 );
+            }
+            else {
+                if ( zSign ) {
+                    increment = ( roundingMode == float_round_down ) && zSig2;
+                }
+                else {
+                    increment = ( roundingMode == float_round_up ) && zSig2;
+                }
+            }
+        }
+    }
+    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
+    if ( increment ) {
+        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+    }
+    else {
+        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+    }
+    return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand formed by the concatenation of `zSig0' and `zSig1', and
+returns the proper quadruple-precision floating-point value corresponding to
+the abstract input.  This routine is just like `roundAndPackFloat128' except
+that the input significand has fewer bits and does not have to be normalized
+in any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float128
+ normalizeRoundAndPackFloat128(
+     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+    int8 shiftCount;
+    bits64 zSig2;
+
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    shiftCount = countLeadingZeros64( zSig0 ) - 15;
+    if ( 0 <= shiftCount ) {
+        zSig2 = 0;
+        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    }
+    else {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+    }
+    zExp -= shiftCount;
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the single-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 int32_to_float32( int32 a )
+{
+    flag zSign;
+
+    if ( a == 0 ) return 0;
+    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+    zSign = ( a < 0 );
+    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the double-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 int32_to_float64( int32 a )
+{
+    flag aSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+
+    if ( a == 0 ) return 0;
+    aSign = ( a < 0 );
+    absA = aSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 21;
+    zSig = absA;
+    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a'
+to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 int32_to_floatx80( int32 a )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+
+    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 32;
+    zSig = absA;
+    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the quadruple-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 int32_to_float128( int32 a )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig0;
+
+    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 17;
+    zSig0 = absA;
+    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float32_to_int32( float32 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    bits64 zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= 0x00800000;
+    shiftCount = 0xAF - aExp;
+    zSig = aSig;
+    zSig <<= 32;
+    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
+    return roundAndPackInt32( aSign, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float32_to_int32_round_to_zero( float32 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    int32 z;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0x9E;
+    if ( 0 <= shiftCount ) {
+        if ( a == 0xCF000000 ) return 0x80000000;
+        float_raise( float_flag_invalid );
+        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+        return 0x80000000;
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig = ( aSig | 0x00800000 )<<8;
+    z = aSig>>( - shiftCount );
+    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return aSign ? - z : z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the double-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float32_to_float64( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float32_to_floatx80( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    aSig |= 0x00800000;
+    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the double-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float32_to_float128( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
+        return packFloat128( aSign, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Rounds the single-precision floating-point value `a' to an integer, and
+returns the result as a single-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_round_to_int( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float32 z;
+
+    aExp = extractFloat32Exp( a );
+    if ( 0x96 <= aExp ) {
+        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+            return propagateFloat32NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x7E ) {
+        if ( (bits32) ( a<<1 ) == 0 ) return a;
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloat32Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+                return packFloat32( aSign, 0x7F, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? 0xBF800000 : 0;
+         case float_round_up:
+            return aSign ? 0x80000000 : 0x3F800000;
+        }
+        return packFloat32( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x96 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the single-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 6;
+    bSig <<= 6;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0xFF ) {
+            if ( aSig ) return propagateFloat32NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= 0x20000000;
+        }
+        shift32RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= 0x20000000;
+        }
+        shift32RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0xFF ) {
+            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+        zSig = 0x40000000 + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= 0x20000000;
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits32) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the single-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 7;
+    bSig <<= 7;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0xFF ) {
+        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return packFloat32( zSign ^ 1, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= 0x40000000;
+    }
+    shift32RightJamming( aSig, - expDiff, &aSig );
+    bSig |= 0x40000000;
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= 0x40000000;
+    }
+    shift32RightJamming( bSig, expDiff, &bSig );
+    aSig |= 0x40000000;
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the single-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_add( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat32Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat32Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_sub( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat32Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat32Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_mul( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig;
+    bits64 zSig64;
+    bits32 zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x7F;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    zSig = zSig64;
+    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+        zSig <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the single-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_div( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b );
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b );
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return packFloat32( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid );
+                return float32_default_nan;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x7D;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+    if ( ( zSig & 0x3F ) == 0 ) {
+        zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the single-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_rem( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits32 aSig, bSig;
+    bits32 q;
+    bits64 aSig64, bSig64, q64;
+    bits32 alternateASig;
+    sbits32 sigMean;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig |= 0x00800000;
+    bSig |= 0x00800000;
+    if ( expDiff < 32 ) {
+        aSig <<= 8;
+        bSig <<= 8;
+        if ( expDiff < 0 ) {
+            if ( expDiff < -1 ) return a;
+            aSig >>= 1;
+        }
+        q = ( bSig <= aSig );
+        if ( q ) aSig -= bSig;
+        if ( 0 < expDiff ) {
+            q = ( ( (bits64) aSig )<<32 ) / bSig;
+            q >>= 32 - expDiff;
+            bSig >>= 2;
+            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+        }
+        else {
+            aSig >>= 2;
+            bSig >>= 2;
+        }
+    }
+    else {
+        if ( bSig <= aSig ) aSig -= bSig;
+        aSig64 = ( (bits64) aSig )<<40;
+        bSig64 = ( (bits64) bSig )<<40;
+        expDiff -= 64;
+        while ( 0 < expDiff ) {
+            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+            aSig64 = - ( ( bSig * q64 )<<38 );
+            expDiff -= 62;
+        }
+        expDiff += 64;
+        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+        q = q64>>( 64 - expDiff );
+        bSig <<= 6;
+        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits32) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits32) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the single-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_sqrt( float32 a )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits32 aSig, zSig;
+    bits64 rem, term;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, 0 );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+    aSig = ( aSig | 0x00800000 )<<8;
+    zSig = estimateSqrt32( aExp, aSig ) + 2;
+    if ( ( zSig & 0x7F ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = 0xFFFFFFFF;
+        }
+        else {
+            aSig >>= aExp & 1;
+            term = ( (bits64) zSig ) * zSig;
+            rem = ( ( (bits64) aSig )<<32 ) - term;
+            while ( (sbits64) rem < 0 ) {
+                --zSig;
+                rem += ( ( (bits64) zSig )<<1 ) | 1;
+            }
+            zSig |= ( rem != 0 );
+        }
+    }
+    shift32RightJamming( zSig, 1, &zSig );
+    return roundAndPackFloat32( 0, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_eq( float32 a, float32 b )
+{
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_le( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_lt( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The invalid exception is raised
+if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_eq_signaling( float32 a, float32 b )
+{
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_le_quiet( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    //int16 aExp, bExp;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_lt_quiet( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_int32( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_int32_round_to_zero( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount < 21 ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 52 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement unsigned integer format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest positive integer is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_uint32( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = 0; //extractFloat64Sign( a );
+    //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest positive integer is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_uint32_round_to_zero( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount < 21 ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 52 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the single-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float64_to_float32( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    bits32 zSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 22, &aSig );
+    zSig = aSig;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float64_to_floatx80( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    return
+        packFloatx80(
+            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the quadruple-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float64_to_float128( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
+        return packFloat128( aSign, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Rounds the double-precision floating-point value `a' to an integer, and
+returns the result as a double-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_round_to_int( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float64 z;
+
+    aExp = extractFloat64Exp( a );
+    if ( 0x433 <= aExp ) {
+        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+            return propagateFloat64NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x3FE ) {
+        if ( (bits64) ( a<<1 ) == 0 ) return a;
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloat64Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+                return packFloat64( aSign, 0x3FF, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+         case float_round_up:
+            return
+            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+        }
+        return packFloat64( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x433 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the double-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 9;
+    bSig <<= 9;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FF ) {
+            if ( aSig ) return propagateFloat64NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FF ) {
+            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= LIT64( 0x2000000000000000 );
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits64) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the double-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 10;
+    bSig <<= 10;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FF ) {
+        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return packFloat64( zSign ^ 1, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( aSig, - expDiff, &aSig );
+    bSig |= LIT64( 0x4000000000000000 );
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( bSig, expDiff, &bSig );
+    aSig |= LIT64( 0x4000000000000000 );
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the double-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_add( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat64Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat64Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_sub( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat64Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat64Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_mul( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FF;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    zSig0 |= ( zSig1 != 0 );
+    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+        zSig0 <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the double-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to
+the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_div( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    bits64 rem0, rem1;
+    bits64 term0, term1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b );
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b );
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return packFloat64( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid );
+                return float64_default_nan;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FD;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = estimateDiv128To64( aSig, 0, bSig );
+    if ( ( zSig & 0x1FF ) <= 2 ) {
+        mul64To128( bSig, zSig, &term0, &term1 );
+        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+        while ( (sbits64) rem0 < 0 ) {
+            --zSig;
+            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+        }
+        zSig |= ( rem1 != 0 );
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the double-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_rem( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits64 aSig, bSig;
+    bits64 q, alternateASig;
+    sbits64 sigMean;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        aSig >>= 1;
+    }
+    q = ( bSig <= aSig );
+    if ( q ) aSig -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        aSig = - ( ( bSig>>2 ) * q );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        bSig >>= 2;
+        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    else {
+        aSig >>= 2;
+        bSig >>= 2;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits64) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits64) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the double-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_sqrt( float64 a )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits64 aSig, zSig;
+    bits64 rem0, rem1, term0, term1; //, shiftedRem;
+    //float64 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, a );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+    aSig |= LIT64( 0x0010000000000000 );
+    zSig = estimateSqrt32( aExp, aSig>>21 );
+    zSig <<= 31;
+    aSig <<= 9 - ( aExp & 1 );
+    zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
+    if ( ( zSig & 0x3FF ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
+        }
+        else {
+            aSig <<= 2;
+            mul64To128( zSig, zSig, &term0, &term1 );
+            sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+            while ( (sbits64) rem0 < 0 ) {
+                --zSig;
+                shortShift128Left( 0, zSig, 1, &term0, &term1 );
+                term1 |= 1;
+                add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+            }
+            zSig |= ( ( rem0 | rem1 ) != 0 );
+        }
+    }
+    shift64RightJamming( zSig, 1, &zSig );
+    return roundAndPackFloat64( 0, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_eq( float64 a, float64 b )
+{
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_le( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_lt( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The invalid exception is raised
+if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_eq_signaling( float64 a, float64 b )
+{
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_le_quiet( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    //int16 aExp, bExp;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_lt_quiet( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the 32-bit two's complement integer format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic---which means in particular that the conversion
+is rounded according to the current rounding mode.  If `a' is a NaN, the
+largest positive integer is returned.  Otherwise, if the conversion
+overflows, the largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 floatx80_to_int32( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    shiftCount = 0x4037 - aExp;
+    if ( shiftCount <= 0 ) shiftCount = 1;
+    shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the 32-bit two's complement integer format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic, except that the conversion is always rounded
+toward zero.  If `a' is a NaN, the largest positive integer is returned.
+Otherwise, if the conversion overflows, the largest integer with the same
+sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    shiftCount = 0x403E - aExp;
+    if ( shiftCount < 32 ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 63 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the single-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 floatx80_to_float32( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+        }
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 33, &aSig );
+    if ( aExp || aSig ) aExp -= 0x3F81;
+    return roundAndPackFloat32( aSign, aExp, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the double-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 floatx80_to_float64( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig, zSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+        }
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    shift64RightJamming( aSig, 1, &zSig );
+    if ( aExp || aSig ) aExp -= 0x3C01;
+    return roundAndPackFloat64( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the quadruple-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 floatx80_to_float128( floatx80 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig, zSig0, zSig1;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
+    }
+    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+    return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Rounds the extended double-precision floating-point value `a' to an integer,
+and returns the result as an extended quadruple-precision floating-point
+value.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    floatx80 z;
+
+    aExp = extractFloatx80Exp( a );
+    if ( 0x403E <= aExp ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+            return propagateFloatx80NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x3FFE ) {
+        if (    ( aExp == 0 )
+             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+            return a;
+        }
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloatx80Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+               ) {
+                return
+                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+            }
+            break;
+         case float_round_down:
+            return
+                  aSign ?
+                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+                : packFloatx80( 0, 0, 0 );
+         case float_round_up:
+            return
+                  aSign ? packFloatx80( 1, 0, 0 )
+                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+        }
+        return packFloatx80( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x403E - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z.low += lastBitMask>>1;
+        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z.low += roundBitsMask;
+        }
+    }
+    z.low &= ~ roundBitsMask;
+    if ( z.low == 0 ) {
+        ++z.high;
+        z.low = LIT64( 0x8000000000000000 );
+    }
+    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the extended double-
+precision floating-point values `a' and `b'.  If `zSign' is true, the sum is
+negated before being returned.  `zSign' is ignored if the result is a NaN.
+The addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) --expDiff;
+        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( aExp == 0 ) ++expDiff;
+        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+                return propagateFloatx80NaN( a, b );
+            }
+            return a;
+        }
+        zSig1 = 0;
+        zSig0 = aSig + bSig;
+        if ( aExp == 0 ) {
+            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+            goto roundAndPack;
+        }
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    
+    zSig0 = aSig + bSig;
+
+    if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 
+ shiftRight1:
+    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 |= LIT64( 0x8000000000000000 );
+    ++zExp;
+ roundAndPack:
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the extended
+double-precision floating-point values `a' and `b'.  If `zSign' is true,
+the difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    zSig1 = 0;
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) ++expDiff;
+    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) --expDiff;
+    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+    zExp = aExp;
+ normalizeRoundAndPack:
+    return
+        normalizeRoundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the extended double-precision floating-point
+values `a' and `b'.  The operation is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return addFloatx80Sigs( a, b, aSign );
+    }
+    else {
+        return subFloatx80Sigs( a, b, aSign );
+    }
+    
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the extended double-precision floating-
+point values `a' and `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return subFloatx80Sigs( a, b, aSign );
+    }
+    else {
+        return addFloatx80Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the extended double-precision floating-
+point values `a' and `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) goto invalid;
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FFE;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    if ( 0 < (sbits64) zSig0 ) {
+        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the extended double-precision floating-point
+value `a' by the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, term0, term1, term2;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            goto invalid;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return packFloatx80( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low = floatx80_default_nan_low;
+                z.high = floatx80_default_nan_high;
+                return z;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FFE;
+    rem1 = 0;
+    if ( bSig <= aSig ) {
+        shift128Right( aSig, 0, 1, &aSig, &rem1 );
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+    mul64To128( bSig, zSig0, &term0, &term1 );
+    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+    }
+    zSig1 = estimateDiv128To64( rem1, 0, bSig );
+    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+        mul64To128( bSig, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+        }
+        zSig1 |= ( ( rem1 | rem2 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the extended double-precision floating-point value
+`a' with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig;
+    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    floatx80 z;
+
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig0<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        goto invalid;
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    bSig |= LIT64( 0x8000000000000000 );
+    zSign = aSign;
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+        expDiff = 0;
+    }
+    q = ( bSig <= aSig0 );
+    if ( q ) aSig0 -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        mul64To128( bSig, q, &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+        while ( le128( term0, term1, aSig0, aSig1 ) ) {
+            ++q;
+            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        }
+    }
+    else {
+        term1 = 0;
+        term0 = bSig;
+    }
+    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+              && ( q & 1 ) )
+       ) {
+        aSig0 = alternateASig0;
+        aSig1 = alternateASig1;
+        zSign = ! zSign;
+    }
+    return
+        normalizeRoundAndPackFloatx80(
+            80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the extended double-precision floating-point
+value `a'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_sqrt( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    bits64 shiftedRem0, shiftedRem1;
+    floatx80 z;
+
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+        if ( ! aSign ) return a;
+        goto invalid;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+    zSig0 <<= 31;
+    aSig1 = 0;
+    shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
+    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
+    if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
+    shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        shortShift128Left( 0, zSig0, 1, &term0, &term1 );
+        term1 |= 1;
+        add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+    }
+    shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
+    zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
+    if ( (bits64) ( zSig1<<1 ) <= 10 ) {
+        if ( zSig1 == 0 ) zSig1 = 1;
+        mul64To128( zSig0, zSig1, &term1, &term2 );
+        shortShift128Left( term1, term2, 1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
+            term3 |= 1;
+            add192(
+                rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+less than or equal to the corresponding value `b', and 0 otherwise.  The
+comparison is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+less than the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is equal
+to the corresponding value `b', and 0 otherwise.  The invalid exception is
+raised if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is less
+than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+do not cause an exception.  Otherwise, the comparison is performed according
+to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is less
+than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the quadruple-precision floating-point
+value `a' to the 32-bit two's complement integer format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float128_to_int32( float128 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+    aSig0 |= ( aSig1 != 0 );
+    shiftCount = 0x4028 - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+    return roundAndPackInt32( aSign, aSig0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the quadruple-precision floating-point
+value `a' to the 32-bit two's complement integer format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float128_to_int32_round_to_zero( float128 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig0, aSig1, savedASig;
+    int32 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    aSig0 |= ( aSig1 != 0 );
+    shiftCount = 0x402F - aExp;
+    if ( shiftCount < 17 ) {
+        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 48 < shiftCount ) {
+        if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig0 |= LIT64( 0x0001000000000000 );
+    savedASig = aSig0;
+    aSig0 >>= shiftCount;
+    z = aSig0;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig0<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the quadruple-precision floating-point
+value `a' to the single-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float128_to_float32( float128 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+    bits32 zSig;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return commonNaNToFloat32( float128ToCommonNaN( a ) );
+        }
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    aSig0 |= ( aSig1 != 0 );
+    shift64RightJamming( aSig0, 18, &aSig0 );
+    zSig = aSig0;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x3F81;
+    }
+    return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the quadruple-precision floating-point
+value `a' to the double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float128_to_float64( float128 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return commonNaNToFloat64( float128ToCommonNaN( a ) );
+        }
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+    aSig0 |= ( aSig1 != 0 );
+    if ( aExp || aSig0 ) {
+        aSig0 |= LIT64( 0x4000000000000000 );
+        aExp -= 0x3C01;
+    }
+    return roundAndPackFloat64( aSign, aExp, aSig0 );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the quadruple-precision floating-point
+value `a' to the extended double-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float128_to_floatx80( float128 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
+        }
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    else {
+        aSig0 |= LIT64( 0x0001000000000000 );
+    }
+    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Rounds the quadruple-precision floating-point value `a' to an integer, and
+returns the result as a quadruple-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_round_to_int( float128 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float128 z;
+
+    aExp = extractFloat128Exp( a );
+    if ( 0x402F <= aExp ) {
+        if ( 0x406F <= aExp ) {
+            if (    ( aExp == 0x7FFF )
+                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+               ) {
+                return propagateFloat128NaN( a, a );
+            }
+            return a;
+        }
+        lastBitMask = 1;
+        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+        roundBitsMask = lastBitMask - 1;
+        z = a;
+        roundingMode = float_rounding_mode;
+        if ( roundingMode == float_round_nearest_even ) {
+            if ( lastBitMask ) {
+                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+            }
+            else {
+                if ( (sbits64) z.low < 0 ) {
+                    ++z.high;
+                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+                }
+            }
+        }
+        else if ( roundingMode != float_round_to_zero ) {
+            if (   extractFloat128Sign( z )
+                 ^ ( roundingMode == float_round_up ) ) {
+                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+            }
+        }
+        z.low &= ~ roundBitsMask;
+    }
+    else {
+        if ( aExp <= 0x3FFE ) {
+            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+            float_exception_flags |= float_flag_inexact;
+            aSign = extractFloat128Sign( a );
+            switch ( float_rounding_mode ) {
+             case float_round_nearest_even:
+                if (    ( aExp == 0x3FFE )
+                     && (   extractFloat128Frac0( a )
+                          | extractFloat128Frac1( a ) )
+                   ) {
+                    return packFloat128( aSign, 0x3FFF, 0, 0 );
+                }
+                break;
+             case float_round_down:
+                return
+                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+                    : packFloat128( 0, 0, 0, 0 );
+             case float_round_up:
+                return
+                      aSign ? packFloat128( 1, 0, 0, 0 )
+                    : packFloat128( 0, 0x3FFF, 0, 0 );
+            }
+            return packFloat128( aSign, 0, 0, 0 );
+        }
+        lastBitMask = 1;
+        lastBitMask <<= 0x402F - aExp;
+        roundBitsMask = lastBitMask - 1;
+        z.low = 0;
+        z.high = a.high;
+        roundingMode = float_rounding_mode;
+        if ( roundingMode == float_round_nearest_even ) {
+            z.high += lastBitMask>>1;
+            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+                z.high &= ~ lastBitMask;
+            }
+        }
+        else if ( roundingMode != float_round_to_zero ) {
+            if (   extractFloat128Sign( z )
+                 ^ ( roundingMode == float_round_up ) ) {
+                z.high |= ( a.low != 0 );
+                z.high += roundBitsMask;
+            }
+        }
+        z.high &= ~ roundBitsMask;
+    }
+    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the quadruple-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    int32 expDiff;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FFF ) {
+            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig0 |= LIT64( 0x0001000000000000 );
+        }
+        shift128ExtraRightJamming(
+            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FFF ) {
+            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+            return packFloat128( zSign, 0x7FFF, 0, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig0 |= LIT64( 0x0001000000000000 );
+        }
+        shift128ExtraRightJamming(
+            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FFF ) {
+            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+                return propagateFloat128NaN( a, b );
+            }
+            return a;
+        }
+        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+        zSig2 = 0;
+        zSig0 |= LIT64( 0x0002000000000000 );
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    aSig0 |= LIT64( 0x0001000000000000 );
+    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+    --zExp;
+    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+    ++zExp;
+ shiftRight1:
+    shift128ExtraRightJamming(
+        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ roundAndPack:
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the quadruple-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    int32 expDiff;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    expDiff = aExp - bExp;
+    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+            return propagateFloat128NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        z.low = float128_default_nan_low;
+        z.high = float128_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig0 < aSig0 ) goto aBigger;
+    if ( aSig0 < bSig0 ) goto bBigger;
+    if ( bSig1 < aSig1 ) goto aBigger;
+    if ( aSig1 < bSig1 ) goto bBigger;
+    return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig0 |= LIT64( 0x4000000000000000 );
+    }
+    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+    bSig0 |= LIT64( 0x4000000000000000 );
+ bBigger:
+    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig0 |= LIT64( 0x4000000000000000 );
+    }
+    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+    aSig0 |= LIT64( 0x4000000000000000 );
+ aBigger:
+    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the quadruple-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_add( float128 a, float128 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat128Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat128Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the quadruple-precision floating-point
+values `a' and `b'.  The operation is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_sub( float128 a, float128 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat128Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat128Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the quadruple-precision floating-point
+values `a' and `b'.  The operation is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_mul( float128 a, float128 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    bSign = extractFloat128Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if (    ( aSig0 | aSig1 )
+             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+            return propagateFloat128NaN( a, b );
+        }
+        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+        return packFloat128( zSign, 0x7FFF, 0, 0 );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = float128_default_nan_low;
+            z.high = float128_default_nan_high;
+            return z;
+        }
+        return packFloat128( zSign, 0x7FFF, 0, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    if ( bExp == 0 ) {
+        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    }
+    zExp = aExp + bExp - 0x4000;
+    aSig0 |= LIT64( 0x0001000000000000 );
+    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+    zSig2 |= ( zSig3 != 0 );
+    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+        ++zExp;
+    }
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the quadruple-precision floating-point value
+`a' by the corresponding value `b'.  The operation is performed according to
+the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_div( float128 a, float128 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    bSign = extractFloat128Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+        if ( bExp == 0x7FFF ) {
+            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+            goto invalid;
+        }
+        return packFloat128( zSign, 0x7FFF, 0, 0 );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+        return packFloat128( zSign, 0, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( ( bSig0 | bSig1 ) == 0 ) {
+            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low = float128_default_nan_low;
+                z.high = float128_default_nan_high;
+                return z;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat128( zSign, 0x7FFF, 0, 0 );
+        }
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    zExp = aExp - bExp + 0x3FFD;
+    shortShift128Left(
+        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+    shortShift128Left(
+        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+    }
+    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the quadruple-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_rem( float128 a, float128 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig0, bSig1;
+    bits64 q, term0, term1, term2, allZero, alternateASig0, alternateASig1;
+    bits64 sigMean1;
+    sbits64 sigMean0;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    bSig1 = extractFloat128Frac1( b );
+    bSig0 = extractFloat128Frac0( b );
+    bExp = extractFloat128Exp( b );
+    bSign = extractFloat128Sign( b );
+    if ( aExp == 0x7FFF ) {
+        if (    ( aSig0 | aSig1 )
+             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+            return propagateFloat128NaN( a, b );
+        }
+        goto invalid;
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( ( bSig0 | bSig1 ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = float128_default_nan_low;
+            z.high = float128_default_nan_high;
+            return z;
+        }
+        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return a;
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    expDiff = aExp - bExp;
+    if ( expDiff < -1 ) return a;
+    shortShift128Left(
+        aSig0 | LIT64( 0x0001000000000000 ),
+        aSig1,
+        15 - ( expDiff < 0 ),
+        &aSig0,
+        &aSig1
+    );
+    shortShift128Left(
+        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+    q = le128( bSig0, bSig1, aSig0, aSig1 );
+    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+        q = ( 4 < q ) ? q - 4 : 0;
+        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+        expDiff -= 61;
+    }
+    if ( -64 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+        q = ( 4 < q ) ? q - 4 : 0;
+        q >>= - expDiff;
+        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+        expDiff += 52;
+        if ( expDiff < 0 ) {
+            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+        }
+        else {
+            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+        }
+        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+    }
+    else {
+        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+    }
+    do {
+        alternateASig0 = aSig0;
+        alternateASig1 = aSig1;
+        ++q;
+        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+    } while ( 0 <= (sbits64) aSig0 );
+    add128(
+        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
+    if (    ( sigMean0 < 0 )
+         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+        aSig0 = alternateASig0;
+        aSig1 = alternateASig1;
+    }
+    zSign = ( (sbits64) aSig0 < 0 );
+    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+    return
+        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the quadruple-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float128 float128_sqrt( float128 a )
+{
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1, zSig2;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    bits64 shiftedRem0, shiftedRem1;
+    float128 z;
+
+    aSig1 = extractFloat128Frac1( a );
+    aSig0 = extractFloat128Frac0( a );
+    aExp = extractFloat128Exp( a );
+    aSign = extractFloat128Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
+        if ( ! aSign ) return a;
+        goto invalid;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low = float128_default_nan_low;
+        z.high = float128_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+    }
+    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+    aSig0 |= LIT64( 0x0001000000000000 );
+    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+    zSig0 <<= 31;
+    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
+    if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
+    shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        shortShift128Left( 0, zSig0, 1, &term0, &term1 );
+        term1 |= 1;
+        add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+    }
+    shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
+    zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
+    if ( ( zSig1 & 0x3FFF ) <= 5 ) {
+        if ( zSig1 == 0 ) zSig1 = 1;
+        mul64To128( zSig0, zSig1, &term1, &term2 );
+        shortShift128Left( term1, term2, 1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
+            term3 |= 1;
+            add192(
+                rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float128_eq( float128 a, float128 b )
+{
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is less than
+or equal to the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float128_le( float128 a, float128 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float128_lt( float128 a, float128 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is equal to
+the corresponding value `b', and 0 otherwise.  The invalid exception is
+raised if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float128_eq_signaling( float128 a, float128 b )
+{
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is less than
+or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float128_le_quiet( float128 a, float128 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the quadruple-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float128_lt_quiet( float128 a, float128 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h
new file mode 100644
index 000000000..26745a4cb
--- /dev/null
+++ b/arch/arm/nwfpe/softfloat.h
@@ -0,0 +1,290 @@
+
+/*
+===============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+#ifndef __SOFTFLOAT_H__
+#define __SOFTFLOAT_H__
+
+/*
+-------------------------------------------------------------------------------
+The macro `FLOATX80' must be defined to enable the extended double-precision
+floating-point format `floatx80'.  If this macro is not defined, the
+`floatx80' type will not be defined, and none of the functions that either
+input or output the `floatx80' type will be defined.  The same applies to
+the `FLOAT128' macro and the quadruple-precision format `float128'.
+-------------------------------------------------------------------------------
+*/
+#define FLOATX80
+/* #define FLOAT128 */
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point types.
+-------------------------------------------------------------------------------
+*/
+typedef unsigned long int float32;
+typedef unsigned long long float64;
+#ifdef FLOATX80
+typedef struct {
+    unsigned short high;
+    unsigned long long low;
+} floatx80;
+#endif
+#ifdef FLOAT128
+typedef struct {
+    unsigned long long high, low;
+} float128;
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point underflow tininess-detection mode.
+-------------------------------------------------------------------------------
+*/
+extern signed char float_detect_tininess;
+enum {
+    float_tininess_after_rounding  = 0,
+    float_tininess_before_rounding = 1
+};
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point rounding mode.
+-------------------------------------------------------------------------------
+*/
+extern signed char float_rounding_mode;
+enum {
+    float_round_nearest_even = 0,
+    float_round_to_zero      = 1,
+    float_round_down         = 2,
+    float_round_up           = 3
+};
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point exception flags.
+-------------------------------------------------------------------------------
+extern signed char float_exception_flags;
+enum {
+    float_flag_inexact   =  1,
+    float_flag_underflow =  2,
+    float_flag_overflow  =  4,
+    float_flag_divbyzero =  8,
+    float_flag_invalid   = 16
+};
+
+ScottB: November 4, 1998
+Changed the enumeration to match the bit order in the FPA11.
+*/
+
+extern signed char float_exception_flags;
+enum {
+    float_flag_invalid   =  1,
+    float_flag_divbyzero =  2,
+    float_flag_overflow  =  4,
+    float_flag_underflow =  8,
+    float_flag_inexact   = 16
+};
+
+/*
+-------------------------------------------------------------------------------
+Routine to raise any or all of the software IEC/IEEE floating-point
+exception flags.
+-------------------------------------------------------------------------------
+*/
+void float_raise( signed char );
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE integer-to-floating-point conversion routines.
+-------------------------------------------------------------------------------
+*/
+float32 int32_to_float32( signed int );
+float64 int32_to_float64( signed int );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( signed int );
+#endif
+#ifdef FLOAT128
+float128 int32_to_float128( signed int );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE single-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float32_to_int32( float32 );
+signed int float32_to_int32_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+#endif
+#ifdef FLOAT128
+float128 float32_to_float128( float32 );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE single-precision operations.
+-------------------------------------------------------------------------------
+*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+char float32_eq( float32, float32 );
+char float32_le( float32, float32 );
+char float32_lt( float32, float32 );
+char float32_eq_signaling( float32, float32 );
+char float32_le_quiet( float32, float32 );
+char float32_lt_quiet( float32, float32 );
+char float32_is_signaling_nan( float32 );
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE double-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float64_to_int32( float64 );
+signed int float64_to_int32_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+#endif
+#ifdef FLOAT128
+float128 float64_to_float128( float64 );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE double-precision operations.
+-------------------------------------------------------------------------------
+*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+char float64_eq( float64, float64 );
+char float64_le( float64, float64 );
+char float64_lt( float64, float64 );
+char float64_eq_signaling( float64, float64 );
+char float64_le_quiet( float64, float64 );
+char float64_lt_quiet( float64, float64 );
+char float64_is_signaling_nan( float64 );
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int floatx80_to_int32( floatx80 );
+signed int floatx80_to_int32_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+#ifdef FLOAT128
+float128 floatx80_to_float128( floatx80 );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision rounding precision.  Valid
+values are 32, 64, and 80.
+-------------------------------------------------------------------------------
+*/
+extern signed char floatx80_rounding_precision;
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision operations.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+char floatx80_eq( floatx80, floatx80 );
+char floatx80_le( floatx80, floatx80 );
+char floatx80_lt( floatx80, floatx80 );
+char floatx80_eq_signaling( floatx80, floatx80 );
+char floatx80_le_quiet( floatx80, floatx80 );
+char floatx80_lt_quiet( floatx80, floatx80 );
+char floatx80_is_signaling_nan( floatx80 );
+
+#endif
+
+#ifdef FLOAT128
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE quadruple-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float128_to_int32( float128 );
+signed int float128_to_int32_round_to_zero( float128 );
+float32 float128_to_float32( float128 );
+float64 float128_to_float64( float128 );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE quadruple-precision operations.
+-------------------------------------------------------------------------------
+*/
+float128 float128_round_to_int( float128 );
+float128 float128_add( float128, float128 );
+float128 float128_sub( float128, float128 );
+float128 float128_mul( float128, float128 );
+float128 float128_div( float128, float128 );
+float128 float128_rem( float128, float128 );
+float128 float128_sqrt( float128 );
+char float128_eq( float128, float128 );
+char float128_le( float128, float128 );
+char float128_lt( float128, float128 );
+char float128_eq_signaling( float128, float128 );
+char float128_le_quiet( float128, float128 );
+char float128_lt_quiet( float128, float128 );
+char float128_is_signaling_nan( float128 );
+
+#endif
+
+#endif
diff --git a/arch/arm/vmlinux-armv.lds b/arch/arm/vmlinux-armv.lds
index f0d4a86c5..681143172 100644
--- a/arch/arm/vmlinux-armv.lds
+++ b/arch/arm/vmlinux-armv.lds
@@ -7,50 +7,64 @@ OUTPUT_ARCH(arm)
 ENTRY(_start)
 SECTIONS
 {
-  _text = .;			/* Text and read-only data */
-  .text : {
+  _text = .;			/* Text and read-only data	*/
+  .text : { }			/* Set text start address	*/
+
+  __init_begin = .;		/* Init code and data		*/
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(4096);
+  __init_end = .;
+
+  __ebsa285_begin = .;
+  .text.ebsa285 : { *(.text.ebsa285) }
+  .data.ebsa285 : { *(.data.ebsa285) }
+  . = ALIGN(4096);
+  __ebsa285_end = .;
+
+  __netwinder_begin = .;
+  .text.netwinder : { *(.text.netwinder) }
+  .data.netwinder : { *(.data.netwinder) }
+  . = ALIGN(4096);
+  __netwinder_end = .;
+
+  .text.real : {		/* Real text segment		*/
 	*(.text)
 	*(.fixup)
 	*(.gnu.warning)
-	} = 0x9090
+	}
+
   .text.lock : { *(.text.lock) }	/* out-of-line lock text */
   .rodata : { *(.rodata) }
   .kstrtab : { *(.kstrtab) }
 
-  . = ALIGN(16);		/* Exception table */
+  . = ALIGN(16);		/* Exception table		*/
   __start___ex_table = .;
   __ex_table : { *(__ex_table) }
   __stop___ex_table = .;
 
-  __start___ksymtab = .;	/* Kernel symbol table */
+  __start___ksymtab = .;	/* Kernel symbol table		*/
   __ksymtab : { *(__ksymtab) }
   __stop___ksymtab = .;
 
-  _etext = .;			/* End of text section */
+  _etext = .;			/* End of text section		*/
 
   . = ALIGN(8192);
-  .data : {			/* Data */
+  .data : {			/* Data				*/
 	*(.init.task)
 	*(.data)
 	CONSTRUCTORS
 	}
 
-  _edata = .;			/* End of data section */
-
-  . = ALIGN(4096);		/* Init code and data */
-  __init_begin = .;
-  .text.init : { *(.text.init) }
-  .data.init : { *(.data.init) }
-  . = ALIGN(4096);
-  __init_end = .;
+  _edata = .;			/* End of data section		*/
 
-  __bss_start = .;		/* BSS */
+  __bss_start = .;		/* BSS				*/
   .bss : {
 	*(.bss)
 	}
   _end = . ;
 
-  /* Stabs debugging sections.  */
+				/* Stabs debugging sections.	*/
   .stab 0 : { *(.stab) }
   .stabstr 0 : { *(.stabstr) }
   .stab.excl 0 : { *(.stab.excl) }
author	Ralf Baechle <ralf@linux-mips.org>	1999-06-22 23:05:57 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1999-06-22 23:05:57 +0000
commit	51d3b7814cdccef9188240fe0cbd8d97ff2c7470 (patch)
tree	5cbb01d0323d4f63ade66bdf48ba4a91aaa6df16 /arch/arm
parent	52273a23c9a84336b93a35e4847fc88fac7eb0e4 (diff)