[patches.git] / nios2-5.1.patch

the niosii-gnutools-src-5.1.tgz release from altera is missing a lot of
core gcc files, this patch adds them back in

--- ./gcc/gcc/config/nios2/crti.asm
+++ ./gcc/gcc/config/nios2/crti.asm
@@ -0,0 +1,88 @@
+/*
+  Copyright (C) 2003 
+ by Jonah Graham (jgraham@altera.com)
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.
+
+   As a special exception, if you link this library with files
+   compiled with GCC to produce an executable, this does not cause
+   the resulting executable to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+
+
+This file just make a stack frame for the contents of the .fini and
+.init sections.  Users may put any desired instructions in those
+sections.
+
+
+While technically any code can be put in the init and fini sections
+most stuff will not work other than stuff which obeys the call frame
+and ABI. All the call-preserved registers are saved, the call clobbered
+registers should have been saved by the code calling init and fini.
+
+See crtstuff.c for an example of code that inserts itself in the 
+init and fini sections. 
+
+See crt0.s for the code that calls init and fini.
+*/
+
+	.file	"crti.asm"
+
+	.section	".init"
+	.align 2
+	.global	_init
+_init:
+	addi	sp, sp, -48
+	stw	ra, 44(sp)
+	stw	r23, 40(sp)
+	stw	r22, 36(sp)
+	stw	r21, 32(sp)
+	stw	r20, 28(sp)
+	stw	r19, 24(sp)
+	stw	r18, 20(sp)
+	stw	r17, 16(sp)
+	stw	r16, 12(sp)
+	stw	fp, 8(sp)
+	mov	fp, sp
+	
+	
+	.section	".fini"
+	.align	2
+	.global	_fini
+_fini:
+	addi	sp, sp, -48
+	stw	ra, 44(sp)
+	stw	r23, 40(sp)
+	stw	r22, 36(sp)
+	stw	r21, 32(sp)
+	stw	r20, 28(sp)
+	stw	r19, 24(sp)
+	stw	r18, 20(sp)
+	stw	r17, 16(sp)
+	stw	r16, 12(sp)
+	stw	fp, 8(sp)
+	mov	fp, sp
+	
+
--- ./gcc/gcc/config/nios2/crtn.asm
+++ ./gcc/gcc/config/nios2/crtn.asm
@@ -0,0 +1,70 @@
+/*
+  Copyright (C) 2003 
+ by Jonah Graham (jgraham@altera.com)
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.
+
+   As a special exception, if you link this library with files
+   compiled with GCC to produce an executable, this does not cause
+   the resulting executable to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+
+
+This file just makes sure that the .fini and .init sections do in
+fact return.  Users may put any desired instructions in those sections.
+This file is the last thing linked into any executable.
+*/	
+	.file	"crtn.asm"
+
+
+
+	.section	".init"
+	ldw	ra, 44(sp)
+	ldw	r23, 40(sp)
+	ldw	r22, 36(sp)
+	ldw	r21, 32(sp)
+	ldw	r20, 28(sp)
+	ldw	r19, 24(sp)
+	ldw	r18, 20(sp)
+	ldw	r17, 16(sp)
+	ldw	r16, 12(sp)
+	ldw	fp, 8(sp)
+	addi	sp, sp, -48
+	ret
+	
+	.section	".fini"
+	ldw	ra, 44(sp)
+	ldw	r23, 40(sp)
+	ldw	r22, 36(sp)
+	ldw	r21, 32(sp)
+	ldw	r20, 28(sp)
+	ldw	r19, 24(sp)
+	ldw	r18, 20(sp)
+	ldw	r17, 16(sp)
+	ldw	r16, 12(sp)
+	ldw	fp, 8(sp)
+	addi	sp, sp, -48
+	ret
+	
--- ./gcc/gcc/config/nios2/lib2-divmod-hi.c
+++ ./gcc/gcc/config/nios2/lib2-divmod-hi.c
@@ -0,0 +1,123 @@
+
+/* We include auto-host.h here to get HAVE_GAS_HIDDEN.  This is
+   supposedly valid even though this is a "target" file.  */
+#include "auto-host.h"
+
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+
+/* Don't use `fancy_abort' here even if config.h says to use it.  */
+#ifdef abort
+#undef abort
+#endif
+
+
+#ifdef HAVE_GAS_HIDDEN
+#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
+#else
+#define ATTRIBUTE_HIDDEN
+#endif
+
+#include "libgcc2.h"
+
+extern HItype __modhi3 (HItype, HItype);
+extern HItype __divhi3 (HItype, HItype);
+extern HItype __umodhi3 (HItype, HItype);
+extern HItype __udivhi3 (HItype, HItype);
+
+static UHItype udivmodhi4(UHItype, UHItype, word_type);
+
+static UHItype
+udivmodhi4(UHItype num, UHItype den, word_type modwanted)
+{
+  UHItype bit = 1;
+  UHItype res = 0;
+
+  while (den < num && bit && !(den & (1L<<15)))
+    {
+      den <<=1;
+      bit <<=1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>=1;
+      den >>=1;
+    }
+  if (modwanted) return num;
+  return res;
+}
+
+
+HItype
+__divhi3 (HItype a, HItype b)
+{
+  word_type neg = 0;
+  HItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmodhi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+
+HItype
+__modhi3 (HItype a, HItype b)
+{
+  word_type neg = 0;
+  HItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmodhi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+
+HItype
+__udivhi3 (HItype a, HItype b)
+{
+  return udivmodhi4 (a, b, 0);
+}
+
+
+HItype
+__umodhi3 (HItype a, HItype b)
+{
+  return udivmodhi4 (a, b, 1);
+}
+
--- ./gcc/gcc/config/nios2/lib2-divmod.c
+++ ./gcc/gcc/config/nios2/lib2-divmod.c
@@ -0,0 +1,126 @@
+
+/* We include auto-host.h here to get HAVE_GAS_HIDDEN.  This is
+   supposedly valid even though this is a "target" file.  */
+#include "auto-host.h"
+
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+
+/* Don't use `fancy_abort' here even if config.h says to use it.  */
+#ifdef abort
+#undef abort
+#endif
+
+
+#ifdef HAVE_GAS_HIDDEN
+#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
+#else
+#define ATTRIBUTE_HIDDEN
+#endif
+
+#include "libgcc2.h"
+
+extern SItype __modsi3 (SItype, SItype);
+extern SItype __divsi3 (SItype, SItype);
+extern SItype __umodsi3 (SItype, SItype);
+extern SItype __udivsi3 (SItype, SItype);
+
+static USItype udivmodsi4(USItype, USItype, word_type);
+
+/* 16-bit SI divide and modulo as used in NIOS */
+
+
+static USItype
+udivmodsi4(USItype num, USItype den, word_type modwanted)
+{
+  USItype bit = 1;
+  USItype res = 0;
+
+  while (den < num && bit && !(den & (1L<<31)))
+    {
+      den <<=1;
+      bit <<=1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>=1;
+      den >>=1;
+    }
+  if (modwanted) return num;
+  return res;
+}
+
+
+SItype
+__divsi3 (SItype a, SItype b)
+{
+  word_type neg = 0;
+  SItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmodsi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+
+SItype
+__modsi3 (SItype a, SItype b)
+{
+  word_type neg = 0;
+  SItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmodsi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+
+SItype
+__udivsi3 (SItype a, SItype b)
+{
+  return udivmodsi4 (a, b, 0);
+}
+
+
+SItype
+__umodsi3 (SItype a, SItype b)
+{
+  return udivmodsi4 (a, b, 1);
+}
+
--- ./gcc/gcc/config/nios2/lib2-divtable.c
+++ ./gcc/gcc/config/nios2/lib2-divtable.c
@@ -0,0 +1,46 @@
+
+/* We include auto-host.h here to get HAVE_GAS_HIDDEN.  This is
+   supposedly valid even though this is a "target" file.  */
+#include "auto-host.h"
+
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+
+/* Don't use `fancy_abort' here even if config.h says to use it.  */
+#ifdef abort
+#undef abort
+#endif
+
+
+#ifdef HAVE_GAS_HIDDEN
+#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
+#else
+#define ATTRIBUTE_HIDDEN
+#endif
+
+#include "libgcc2.h"
+
+UQItype __divsi3_table[] =
+{
+  0, 0/1, 0/2, 0/3, 0/4, 0/5, 0/6, 0/7, 0/8, 0/9, 0/10, 0/11, 0/12, 0/13, 0/14, 0/15,
+  0, 1/1, 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13, 1/14, 1/15,
+  0, 2/1, 2/2, 2/3, 2/4, 2/5, 2/6, 2/7, 2/8, 2/9, 2/10, 2/11, 2/12, 2/13, 2/14, 2/15,
+  0, 3/1, 3/2, 3/3, 3/4, 3/5, 3/6, 3/7, 3/8, 3/9, 3/10, 3/11, 3/12, 3/13, 3/14, 3/15,
+  0, 4/1, 4/2, 4/3, 4/4, 4/5, 4/6, 4/7, 4/8, 4/9, 4/10, 4/11, 4/12, 4/13, 4/14, 4/15,
+  0, 5/1, 5/2, 5/3, 5/4, 5/5, 5/6, 5/7, 5/8, 5/9, 5/10, 5/11, 5/12, 5/13, 5/14, 5/15,
+  0, 6/1, 6/2, 6/3, 6/4, 6/5, 6/6, 6/7, 6/8, 6/9, 6/10, 6/11, 6/12, 6/13, 6/14, 6/15,
+  0, 7/1, 7/2, 7/3, 7/4, 7/5, 7/6, 7/7, 7/8, 7/9, 7/10, 7/11, 7/12, 7/13, 7/14, 7/15,
+  0, 8/1, 8/2, 8/3, 8/4, 8/5, 8/6, 8/7, 8/8, 8/9, 8/10, 8/11, 8/12, 8/13, 8/14, 8/15,
+  0, 9/1, 9/2, 9/3, 9/4, 9/5, 9/6, 9/7, 9/8, 9/9, 9/10, 9/11, 9/12, 9/13, 9/14, 9/15,
+  0, 10/1, 10/2, 10/3, 10/4, 10/5, 10/6, 10/7, 10/8, 10/9, 10/10, 10/11, 10/12, 10/13, 10/14, 10/15,
+  0, 11/1, 11/2, 11/3, 11/4, 11/5, 11/6, 11/7, 11/8, 11/9, 11/10, 11/11, 11/12, 11/13, 11/14, 11/15,
+  0, 12/1, 12/2, 12/3, 12/4, 12/5, 12/6, 12/7, 12/8, 12/9, 12/10, 12/11, 12/12, 12/13, 12/14, 12/15,
+  0, 13/1, 13/2, 13/3, 13/4, 13/5, 13/6, 13/7, 13/8, 13/9, 13/10, 13/11, 13/12, 13/13, 13/14, 13/15,
+  0, 14/1, 14/2, 14/3, 14/4, 14/5, 14/6, 14/7, 14/8, 14/9, 14/10, 14/11, 14/12, 14/13, 14/14, 14/15,
+  0, 15/1, 15/2, 15/3, 15/4, 15/5, 15/6, 15/7, 15/8, 15/9, 15/10, 15/11, 15/12, 15/13, 15/14, 15/15,
+};
+
--- ./gcc/gcc/config/nios2/lib2-mul.c
+++ ./gcc/gcc/config/nios2/lib2-mul.c
@@ -0,0 +1,103 @@
+/* while we are debugging (ie compile outside of gcc build) 
+   disable gcc specific headers */
+#ifndef DEBUG_MULSI3
+
+
+/* We include auto-host.h here to get HAVE_GAS_HIDDEN.  This is
+   supposedly valid even though this is a "target" file.  */
+#include "auto-host.h"
+
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+
+/* Don't use `fancy_abort' here even if config.h says to use it.  */
+#ifdef abort
+#undef abort
+#endif
+
+
+#ifdef HAVE_GAS_HIDDEN
+#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
+#else
+#define ATTRIBUTE_HIDDEN
+#endif
+
+#include "libgcc2.h"
+
+#else
+#define SItype int
+#define USItype unsigned int
+#endif
+
+
+extern SItype __mulsi3 (SItype, SItype);
+
+SItype
+__mulsi3 (SItype a, SItype b)
+{
+  SItype res = 0;
+  USItype cnt = a;
+  
+  while (cnt)
+    {
+      if (cnt & 1)
+        {
+	  res += b;	  
+	}
+      b <<= 1;
+      cnt >>= 1;
+    }
+    
+  return res;
+}
+/*
+TODO: Choose best alternative implementation.
+
+SItype
+__divsi3 (SItype a, SItype b)
+{
+  SItype res = 0;
+  USItype cnt = 0;
+  
+  while (cnt < 32)
+    {
+      if (a & (1L << cnt))
+        {
+	  res += b;	  
+	}
+      b <<= 1;
+      cnt++;
+    }
+    
+  return res;
+}
+*/
+
+
+#ifdef DEBUG_MULSI3
+
+int
+main ()
+{
+  int i, j;
+  int error = 0;
+  
+  for (i = -1000; i < 1000; i++)
+    for (j = -1000; j < 1000; j++)
+      {
+	int expect = i * j;
+	int actual = A__divsi3 (i, j);
+	if (expect != actual)
+	  {
+	    printf ("error: %d * %d = %d not %d\n", i, j, expect, actual);
+	    error = 1;
+	  }
+      }
+
+  return error;
+}
+#endif
--- ./gcc/gcc/config/nios2/nios2-dp-bit.c
+++ ./gcc/gcc/config/nios2/nios2-dp-bit.c
@@ -0,0 +1,1652 @@
+
+/* This is a software floating point library which can be used
+   for targets without hardware floating point. 
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.  */
+
+/* This implements IEEE 754 format arithmetic, but does not provide a
+   mechanism for setting the rounding mode, or for generating or handling
+   exceptions.
+
+   The original code by Steve Chamberlain, hacked by Mark Eichin and Jim
+   Wilson, all of Cygnus Support.  */
+
+/* The intended way to use this file is to make two copies, add `#define FLOAT'
+   to one copy, then compile both copies and add them to libgcc.a.  */
+
+#include "tconfig.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "config/fp-bit.h"
+
+/* The following macros can be defined to change the behavior of this file:
+   FLOAT: Implement a `float', aka SFmode, fp library.  If this is not
+     defined, then this file implements a `double', aka DFmode, fp library.
+   FLOAT_ONLY: Used with FLOAT, to implement a `float' only library, i.e.
+     don't include float->double conversion which requires the double library.
+     This is useful only for machines which can't support doubles, e.g. some
+     8-bit processors.
+   CMPtype: Specify the type that floating point compares should return.
+     This defaults to SItype, aka int.
+   US_SOFTWARE_GOFAST: This makes all entry points use the same names as the
+     US Software goFast library.
+   _DEBUG_BITFLOAT: This makes debugging the code a little easier, by adding
+     two integers to the FLO_union_type.
+   NO_DENORMALS: Disable handling of denormals.
+   NO_NANS: Disable nan and infinity handling
+   SMALL_MACHINE: Useful when operations on QIs and HIs are faster
+     than on an SI */
+
+/* We don't currently support extended floats (long doubles) on machines
+   without hardware to deal with them.
+
+   These stubs are just to keep the linker from complaining about unresolved
+   references which can be pulled in from libio & libstdc++, even if the
+   user isn't using long doubles.  However, they may generate an unresolved
+   external to abort if abort is not used by the function, and the stubs
+   are referenced from within libc, since libgcc goes before and after the
+   system library.  */
+
+#ifdef DECLARE_LIBRARY_RENAMES
+  DECLARE_LIBRARY_RENAMES
+#endif
+
+#ifdef EXTENDED_FLOAT_STUBS
+extern void abort (void);
+void __extendsfxf2 (void) { abort(); }
+void __extenddfxf2 (void) { abort(); }
+void __truncxfdf2 (void) { abort(); }
+void __truncxfsf2 (void) { abort(); }
+void __fixxfsi (void) { abort(); }
+void __floatsixf (void) { abort(); }
+void __addxf3 (void) { abort(); }
+void __subxf3 (void) { abort(); }
+void __mulxf3 (void) { abort(); }
+void __divxf3 (void) { abort(); }
+void __negxf2 (void) { abort(); }
+void __eqxf2 (void) { abort(); }
+void __nexf2 (void) { abort(); }
+void __gtxf2 (void) { abort(); }
+void __gexf2 (void) { abort(); }
+void __lexf2 (void) { abort(); }
+void __ltxf2 (void) { abort(); }
+
+void __extendsftf2 (void) { abort(); }
+void __extenddftf2 (void) { abort(); }
+void __trunctfdf2 (void) { abort(); }
+void __trunctfsf2 (void) { abort(); }
+void __fixtfsi (void) { abort(); }
+void __floatsitf (void) { abort(); }
+void __addtf3 (void) { abort(); }
+void __subtf3 (void) { abort(); }
+void __multf3 (void) { abort(); }
+void __divtf3 (void) { abort(); }
+void __negtf2 (void) { abort(); }
+void __eqtf2 (void) { abort(); }
+void __netf2 (void) { abort(); }
+void __gttf2 (void) { abort(); }
+void __getf2 (void) { abort(); }
+void __letf2 (void) { abort(); }
+void __lttf2 (void) { abort(); }
+#else	/* !EXTENDED_FLOAT_STUBS, rest of file */
+
+/* IEEE "special" number predicates */
+
+#ifdef NO_NANS
+
+#define nan() 0
+#define isnan(x) 0
+#define isinf(x) 0
+#else
+
+#if   defined L_thenan_sf
+const fp_number_type __thenan_sf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_df
+const fp_number_type __thenan_df = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_tf
+const fp_number_type __thenan_tf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined TFLOAT
+extern const fp_number_type __thenan_tf;
+#elif defined FLOAT
+extern const fp_number_type __thenan_sf;
+#else
+extern const fp_number_type __thenan_df;
+#endif
+
+INLINE
+static fp_number_type *
+nan (void)
+{
+  /* Discard the const qualifier...  */
+#ifdef TFLOAT
+  return (fp_number_type *) (& __thenan_tf);
+#elif defined FLOAT  
+  return (fp_number_type *) (& __thenan_sf);
+#else
+  return (fp_number_type *) (& __thenan_df);
+#endif
+}
+
+INLINE
+static int
+isnan ( fp_number_type *  x)
+{
+  return x->class == CLASS_SNAN || x->class == CLASS_QNAN;
+}
+
+INLINE
+static int
+isinf ( fp_number_type *  x)
+{
+  return x->class == CLASS_INFINITY;
+}
+
+#endif /* NO_NANS */
+
+INLINE
+static int
+iszero ( fp_number_type *  x)
+{
+  return x->class == CLASS_ZERO;
+}
+
+INLINE 
+static void
+flip_sign ( fp_number_type *  x)
+{
+  x->sign = !x->sign;
+}
+
+extern FLO_type pack_d ( fp_number_type * );
+
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
+FLO_type
+pack_d ( fp_number_type *  src)
+{
+  FLO_union_type dst;
+  fractype fraction = src->fraction.ll;	/* wasn't unsigned before? */
+  int sign = src->sign;
+  int exp = 0;
+
+  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
+    {
+      /* We can't represent these values accurately.  By using the
+	 largest possible magnitude, we guarantee that the conversion
+	 of infinity is at least as big as any finite number.  */
+      exp = EXPMAX;
+      fraction = ((fractype) 1 << FRACBITS) - 1;
+    }
+  else if (isnan (src))
+    {
+      exp = EXPMAX;
+      if (src->class == CLASS_QNAN || 1)
+	{
+#ifdef QUIET_NAN_NEGATED
+	  fraction |= QUIET_NAN - 1;
+#else
+	  fraction |= QUIET_NAN;
+#endif
+	}
+    }
+  else if (isinf (src))
+    {
+      exp = EXPMAX;
+      fraction = 0;
+    }
+  else if (iszero (src))
+    {
+      exp = 0;
+      fraction = 0;
+    }
+  else if (fraction == 0)
+    {
+      exp = 0;
+    }
+  else
+    {
+      if (src->normal_exp < NORMAL_EXPMIN)
+	{
+#ifdef NO_DENORMALS
+	  /* Go straight to a zero representation if denormals are not
+ 	     supported.  The denormal handling would be harmless but
+ 	     isn't unnecessary.  */
+	  exp = 0;
+	  fraction = 0;
+#else /* NO_DENORMALS */
+	  /* This number's exponent is too low to fit into the bits
+	     available in the number, so we'll store 0 in the exponent and
+	     shift the fraction to the right to make up for it.  */
+
+	  int shift = NORMAL_EXPMIN - src->normal_exp;
+
+	  exp = 0;
+
+	  if (shift > FRAC_NBITS - NGARDS)
+	    {
+	      /* No point shifting, since it's more that 64 out.  */
+	      fraction = 0;
+	    }
+	  else
+	    {
+	      int lowbit = (fraction & (((fractype)1 << shift) - 1)) ? 1 : 0;
+	      fraction = (fraction >> shift) | lowbit;
+	    }
+	  if ((fraction & GARDMASK) == GARDMSB)
+	    {
+	      if ((fraction & (1 << NGARDS)))
+		fraction += GARDROUND + 1;
+	    }
+	  else
+	    {
+	      /* Add to the guards to round up.  */
+	      fraction += GARDROUND;
+	    }
+	  /* Perhaps the rounding means we now need to change the
+             exponent, because the fraction is no longer denormal.  */
+	  if (fraction >= IMPLICIT_1)
+	    {
+	      exp += 1;
+	    }
+	  fraction >>= NGARDS;
+#endif /* NO_DENORMALS */
+	}
+      else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
+	       && src->normal_exp > EXPBIAS)
+	{
+	  exp = EXPMAX;
+	  fraction = 0;
+	}
+      else
+	{
+	  exp = src->normal_exp + EXPBIAS;
+	  if (!ROUND_TOWARDS_ZERO)
+	    {
+	      /* IF the gard bits are the all zero, but the first, then we're
+		 half way between two numbers, choose the one which makes the
+		 lsb of the answer 0.  */
+	      if ((fraction & GARDMASK) == GARDMSB)
+		{
+		  if (fraction & (1 << NGARDS))
+		    fraction += GARDROUND + 1;
+		}
+	      else
+		{
+		  /* Add a one to the guards to round up */
+		  fraction += GARDROUND;
+		}
+	      if (fraction >= IMPLICIT_2)
+		{
+		  fraction >>= 1;
+		  exp += 1;
+		}
+	    }
+	  fraction >>= NGARDS;
+
+	  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
+	    {
+	      /* Saturate on overflow.  */
+	      exp = EXPMAX;
+	      fraction = ((fractype) 1 << FRACBITS) - 1;
+	    }
+	}
+    }
+
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  dst.bits.fraction = fraction;
+  dst.bits.exp = exp;
+  dst.bits.sign = sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low, unity;
+   int lowsign, lowexp;
+
+   unity = (halffractype) 1 << HALFFRACBITS;
+
+   /* Set HIGH to the high double's significand, masking out the implicit 1.
+      Set LOW to the low double's full significand.  */
+   high = (fraction >> (FRACBITS - HALFFRACBITS)) & (unity - 1);
+   low = fraction & (unity * 2 - 1);
+
+   /* Get the initial sign and exponent of the low double.  */
+   lowexp = exp - HALFFRACBITS - 1;
+   lowsign = sign;
+
+   /* HIGH should be rounded like a normal double, making |LOW| <=
+      0.5 ULP of HIGH.  Assume round-to-nearest.  */
+   if (exp < EXPMAX)
+     if (low > unity || (low == unity && (high & 1) == 1))
+       {
+	 /* Round HIGH up and adjust LOW to match.  */
+	 high++;
+	 if (high == unity)
+	   {
+	     /* May make it infinite, but that's OK.  */
+	     high = 0;
+	     exp++;
+	   }
+	 low = unity * 2 - low;
+	 lowsign ^= 1;
+       }
+
+   high |= (halffractype) exp << HALFFRACBITS;
+   high |= (halffractype) sign << (HALFFRACBITS + EXPBITS);
+
+   if (exp == EXPMAX || exp == 0 || low == 0)
+     low = 0;
+   else
+     {
+       while (lowexp > 0 && low < unity)
+	 {
+	   low <<= 1;
+	   lowexp--;
+	 }
+
+       if (lowexp <= 0)
+	 {
+	   halffractype roundmsb, round;
+	   int shift;
+
+	   shift = 1 - lowexp;
+	   roundmsb = (1 << (shift - 1));
+	   round = low & ((roundmsb << 1) - 1);
+
+	   low >>= shift;
+	   lowexp = 0;
+
+	   if (round > roundmsb || (round == roundmsb && (low & 1) == 1))
+	     {
+	       low++;
+	       if (low == unity)
+		 /* LOW rounds up to the smallest normal number.  */
+		 lowexp++;
+	     }
+	 }
+
+       low &= unity - 1;
+       low |= (halffractype) lowexp << HALFFRACBITS;
+       low |= (halffractype) lowsign << (HALFFRACBITS + EXPBITS);
+     }
+   dst.value_raw = ((fractype) high << HALFSHIFT) | low;
+ }
+# else
+  dst.value_raw = fraction & ((((fractype)1) << FRACBITS) - (fractype)1);
+  dst.value_raw |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << FRACBITS;
+  dst.value_raw |= ((fractype) (sign & 1)) << (FRACBITS | EXPBITS);
+# endif
+#endif
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+#ifdef TFLOAT
+  {
+    qrtrfractype tmp1 = dst.words[0];
+    qrtrfractype tmp2 = dst.words[1];
+    dst.words[0] = dst.words[3];
+    dst.words[1] = dst.words[2];
+    dst.words[2] = tmp2;
+    dst.words[3] = tmp1;
+  }
+#else
+  {
+    halffractype tmp = dst.words[0];
+    dst.words[0] = dst.words[1];
+    dst.words[1] = tmp;
+  }
+#endif
+#endif
+
+  return dst.value;
+}
+#endif
+
+#if defined(L_unpack_df) || defined(L_unpack_sf) || defined(L_unpack_tf)
+void
+unpack_d (FLO_union_type * src, fp_number_type * dst)
+{
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+  fractype fraction;
+  int exp;
+  int sign;
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+  FLO_union_type swapped;
+
+#ifdef TFLOAT
+  swapped.words[0] = src->words[3];
+  swapped.words[1] = src->words[2];
+  swapped.words[2] = src->words[1];
+  swapped.words[3] = src->words[0];
+#else
+  swapped.words[0] = src->words[1];
+  swapped.words[1] = src->words[0];
+#endif
+  src = &swapped;
+#endif
+  
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  fraction = src->bits.fraction;
+  exp = src->bits.exp;
+  sign = src->bits.sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low;
+   
+   high = src->value_raw >> HALFSHIFT;
+   low = src->value_raw & (((fractype)1 << HALFSHIFT) - 1);
+
+   fraction = high & ((((fractype)1) << HALFFRACBITS) - 1);
+   fraction <<= FRACBITS - HALFFRACBITS;
+   exp = ((int)(high >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+   sign = ((int)(high >> (((HALFFRACBITS + EXPBITS))))) & 1;
+
+   if (exp != EXPMAX && exp != 0 && low != 0)
+     {
+       int lowexp = ((int)(low >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+       int lowsign = ((int)(low >> (((HALFFRACBITS + EXPBITS))))) & 1;
+       int shift;
+       fractype xlow;
+
+       xlow = low & ((((fractype)1) << HALFFRACBITS) - 1);
+       if (lowexp)
+	 xlow |= (((halffractype)1) << HALFFRACBITS);
+       else
+	 lowexp = 1;
+       shift = (FRACBITS - HALFFRACBITS) - (exp - lowexp);
+       if (shift > 0)
+	 xlow <<= shift;
+       else if (shift < 0)
+	 xlow >>= -shift;
+       if (sign == lowsign)
+	 fraction += xlow;
+       else if (fraction >= xlow)
+	 fraction -= xlow;
+       else
+	 {
+	   /* The high part is a power of two but the full number is lower.
+	      This code will leave the implicit 1 in FRACTION, but we'd
+	      have added that below anyway.  */
+	   fraction = (((fractype) 1 << FRACBITS) - xlow) << 1;
+	   exp--;
+	 }
+     }
+ }
+# else
+  fraction = src->value_raw & ((((fractype)1) << FRACBITS) - 1);
+  exp = ((int)(src->value_raw >> FRACBITS)) & ((1 << EXPBITS) - 1);
+  sign = ((int)(src->value_raw >> (FRACBITS + EXPBITS))) & 1;
+# endif
+#endif
+
+  dst->sign = sign;
+  if (exp == 0)
+    {
+      /* Hmm.  Looks like 0 */
+      if (fraction == 0
+#ifdef NO_DENORMALS
+	  || 1
+#endif
+	  )
+	{
+	  /* tastes like zero */
+	  dst->class = CLASS_ZERO;
+	}
+      else
+	{
+	  /* Zero exponent with nonzero fraction - it's denormalized,
+	     so there isn't a leading implicit one - we'll shift it so
+	     it gets one.  */
+	  dst->normal_exp = exp - EXPBIAS + 1;
+	  fraction <<= NGARDS;
+
+	  dst->class = CLASS_NUMBER;
+#if 1
+	  while (fraction < IMPLICIT_1)
+	    {
+	      fraction <<= 1;
+	      dst->normal_exp--;
+	    }
+#endif
+	  dst->fraction.ll = fraction;
+	}
+    }
+  else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp == EXPMAX)
+    {
+      /* Huge exponent*/
+      if (fraction == 0)
+	{
+	  /* Attached to a zero fraction - means infinity */
+	  dst->class = CLASS_INFINITY;
+	}
+      else
+	{
+	  /* Nonzero fraction, means nan */
+#ifdef QUIET_NAN_NEGATED
+	  if ((fraction & QUIET_NAN) == 0)
+#else
+	  if (fraction & QUIET_NAN)
+#endif
+	    {
+	      dst->class = CLASS_QNAN;
+	    }
+	  else
+	    {
+	      dst->class = CLASS_SNAN;
+	    }
+	  /* Keep the fraction part as the nan number */
+	  dst->fraction.ll = fraction;
+	}
+    }
+  else
+    {
+      /* Nothing strange about this number */
+      dst->normal_exp = exp - EXPBIAS;
+      dst->class = CLASS_NUMBER;
+      dst->fraction.ll = (fraction << NGARDS) | IMPLICIT_1;
+    }
+}
+#endif /* L_unpack_df || L_unpack_sf */
+
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
+static fp_number_type *
+_fpadd_parts (fp_number_type * a,
+	      fp_number_type * b,
+	      fp_number_type * tmp)
+{
+  intfrac tfraction;
+
+  /* Put commonly used fields in local variables.  */
+  int a_normal_exp;
+  int b_normal_exp;
+  fractype a_fraction;
+  fractype b_fraction;
+
+  if (isnan (a))
+    {
+      return a;
+    }
+  if (isnan (b))
+    {
+      return b;
+    }
+  if (isinf (a))
+    {
+      /* Adding infinities with opposite signs yields a NaN.  */
+      if (isinf (b) && a->sign != b->sign)
+	return nan ();
+      return a;
+    }
+  if (isinf (b))
+    {
+      return b;
+    }
+  if (iszero (b))
+    {
+      if (iszero (a))
+	{
+	  *tmp = *a;
+	  tmp->sign = a->sign & b->sign;
+	  return tmp;
+	}
+      return a;
+    }
+  if (iszero (a))
+    {
+      return b;
+    }
+
+  /* Got two numbers. shift the smaller and increment the exponent till
+     they're the same */
+  {
+    int diff;
+
+    a_normal_exp = a->normal_exp;
+    b_normal_exp = b->normal_exp;
+    a_fraction = a->fraction.ll;
+    b_fraction = b->fraction.ll;
+
+    diff = a_normal_exp - b_normal_exp;
+
+    if (diff < 0)
+      diff = -diff;
+    if (diff < FRAC_NBITS)
+      {
+	/* ??? This does shifts one bit at a time.  Optimize.  */
+	while (a_normal_exp > b_normal_exp)
+	  {
+	    b_normal_exp++;
+	    LSHIFT (b_fraction);
+	  }
+	while (b_normal_exp > a_normal_exp)
+	  {
+	    a_normal_exp++;
+	    LSHIFT (a_fraction);
+	  }
+      }
+    else
+      {
+	/* Somethings's up.. choose the biggest */
+	if (a_normal_exp > b_normal_exp)
+	  {
+	    b_normal_exp = a_normal_exp;
+	    b_fraction = 0;
+	  }
+	else
+	  {
+	    a_normal_exp = b_normal_exp;
+	    a_fraction = 0;
+	  }
+      }
+  }
+
+  if (a->sign != b->sign)
+    {
+      if (a->sign)
+	{
+	  tfraction = -a_fraction + b_fraction;
+	}
+      else
+	{
+	  tfraction = a_fraction - b_fraction;
+	}
+      if (tfraction >= 0)
+	{
+	  tmp->sign = 0;
+	  tmp->normal_exp = a_normal_exp;
+	  tmp->fraction.ll = tfraction;
+	}
+      else
+	{
+	  tmp->sign = 1;
+	  tmp->normal_exp = a_normal_exp;
+	  tmp->fraction.ll = -tfraction;
+	}
+      /* and renormalize it */
+
+      while (tmp->fraction.ll < IMPLICIT_1 && tmp->fraction.ll)
+	{
+	  tmp->fraction.ll <<= 1;
+	  tmp->normal_exp--;
+	}
+    }
+  else
+    {
+      tmp->sign = a->sign;
+      tmp->normal_exp = a_normal_exp;
+      tmp->fraction.ll = a_fraction + b_fraction;
+    }
+  tmp->class = CLASS_NUMBER;
+  /* Now the fraction is added, we have to shift down to renormalize the
+     number */
+
+  if (tmp->fraction.ll >= IMPLICIT_2)
+    {
+      LSHIFT (tmp->fraction.ll);
+      tmp->normal_exp++;
+    }
+  return tmp;
+
+}
+
+FLO_type
+add (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpadd_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+
+FLO_type
+sub (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  b.sign ^= 1;
+
+  res = _fpadd_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+#endif /* L_addsub_sf || L_addsub_df */
+
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
+static inline __attribute__ ((__always_inline__)) fp_number_type *
+_fpmul_parts ( fp_number_type *  a,
+	       fp_number_type *  b,
+	       fp_number_type * tmp)
+{
+  fractype low = 0;
+  fractype high = 0;
+
+  if (isnan (a))
+    {
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (isnan (b))
+    {
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+  if (isinf (a))
+    {
+      if (iszero (b))
+	return nan ();
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (isinf (b))
+    {
+      if (iszero (a))
+	{
+	  return nan ();
+	}
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+  if (iszero (a))
+    {
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (iszero (b))
+    {
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+
+  /* Calculate the mantissa by multiplying both numbers to get a
+     twice-as-wide number.  */
+  {
+#if defined(NO_DI_MODE) || defined(TFLOAT)
+    {
+      fractype x = a->fraction.ll;
+      fractype ylow = b->fraction.ll;
+      fractype yhigh = 0;
+      int bit;
+
+      /* ??? This does multiplies one bit at a time.  Optimize.  */
+      for (bit = 0; bit < FRAC_NBITS; bit++)
+	{
+	  int carry;
+
+	  if (x & 1)
+	    {
+	      carry = (low += ylow) < ylow;
+	      high += yhigh + carry;
+	    }
+	  yhigh <<= 1;
+	  if (ylow & FRACHIGH)
+	    {
+	      yhigh |= 1;
+	    }
+	  ylow <<= 1;
+	  x >>= 1;
+	}
+    }
+#elif defined(FLOAT) 
+    /* Multiplying two USIs to get a UDI, we're safe.  */
+    {
+      UDItype answer = (UDItype)a->fraction.ll * (UDItype)b->fraction.ll;
+      
+      high = answer >> BITS_PER_SI;
+      low = answer;
+    }
+#else
+    /* fractype is DImode, but we need the result to be twice as wide.
+       Assuming a widening multiply from DImode to TImode is not
+       available, build one by hand.  */
+    {
+      USItype nl = a->fraction.ll;
+      USItype nh = a->fraction.ll >> BITS_PER_SI;
+      USItype ml = b->fraction.ll;
+      USItype mh = b->fraction.ll >> BITS_PER_SI;
+      UDItype pp_ll = (UDItype) ml * nl;
+      UDItype pp_hl = (UDItype) mh * nl;
+      UDItype pp_lh = (UDItype) ml * nh;
+      UDItype pp_hh = (UDItype) mh * nh;
+      UDItype res2 = 0;
+      UDItype res0 = 0;
+      UDItype ps_hh__ = pp_hl + pp_lh;
+      if (ps_hh__ < pp_hl)
+	res2 += (UDItype)1 << BITS_PER_SI;
+      pp_hl = (UDItype)(USItype)ps_hh__ << BITS_PER_SI;
+      res0 = pp_ll + pp_hl;
+      if (res0 < pp_ll)
+	res2++;
+      res2 += (ps_hh__ >> BITS_PER_SI) + pp_hh;
+      high = res2;
+      low = res0;
+    }
+#endif
+  }
+
+  tmp->normal_exp = a->normal_exp + b->normal_exp
+    + FRAC_NBITS - (FRACBITS + NGARDS);
+  tmp->sign = a->sign != b->sign;
+  while (high >= IMPLICIT_2)
+    {
+      tmp->normal_exp++;
+      if (high & 1)
+	{
+	  low >>= 1;
+	  low |= FRACHIGH;
+	}
+      high >>= 1;
+    }
+  while (high < IMPLICIT_1)
+    {
+      tmp->normal_exp--;
+
+      high <<= 1;
+      if (low & FRACHIGH)
+	high |= 1;
+      low <<= 1;
+    }
+  /* rounding is tricky. if we only round if it won't make us round later.  */
+#if 0
+  if (low & FRACHIGH2)
+    {
+      if (((high & GARDMASK) != GARDMSB)
+	  && (((high + 1) & GARDMASK) == GARDMSB))
+	{
+	  /* don't round, it gets done again later.  */
+	}
+      else
+	{
+	  high++;
+	}
+    }
+#endif
+  if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
+    {
+      if (high & (1 << NGARDS))
+	{
+	  /* half way, so round to even */
+	  high += GARDROUND + 1;
+	}
+      else if (low)
+	{
+	  /* but we really weren't half way */
+	  high += GARDROUND + 1;
+	}
+    }
+  tmp->fraction.ll = high;
+  tmp->class = CLASS_NUMBER;
+  return tmp;
+}
+
+FLO_type
+multiply (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpmul_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+#endif /* L_mul_sf || L_mul_df */
+
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
+static inline __attribute__ ((__always_inline__)) fp_number_type *
+_fpdiv_parts (fp_number_type * a,
+	      fp_number_type * b)
+{
+  fractype bit;
+  fractype numerator;
+  fractype denominator;
+  fractype quotient;
+
+  if (isnan (a))
+    {
+      return a;
+    }
+  if (isnan (b))
+    {
+      return b;
+    }
+
+  a->sign = a->sign ^ b->sign;
+
+  if (isinf (a) || iszero (a))
+    {
+      if (a->class == b->class)
+	return nan ();
+      return a;
+    }
+
+  if (isinf (b))
+    {
+      a->fraction.ll = 0;
+      a->normal_exp = 0;
+      return a;
+    }
+  if (iszero (b))
+    {
+      a->class = CLASS_INFINITY;
+      return a;
+    }
+
+  /* Calculate the mantissa by multiplying both 64bit numbers to get a
+     128 bit number */
+  {
+    /* quotient =
+       ( numerator / denominator) * 2^(numerator exponent -  denominator exponent)
+     */
+
+    a->normal_exp = a->normal_exp - b->normal_exp;
+    numerator = a->fraction.ll;
+    denominator = b->fraction.ll;
+
+    if (numerator < denominator)
+      {
+	/* Fraction will be less than 1.0 */
+	numerator *= 2;
+	a->normal_exp--;
+      }
+    bit = IMPLICIT_1;
+    quotient = 0;
+    /* ??? Does divide one bit at a time.  Optimize.  */
+    while (bit)
+      {
+	if (numerator >= denominator)
+	  {
+	    quotient |= bit;
+	    numerator -= denominator;
+	  }
+	bit >>= 1;
+	numerator *= 2;
+      }
+
+    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
+      {
+	if (quotient & (1 << NGARDS))
+	  {
+	    /* half way, so round to even */
+	    quotient += GARDROUND + 1;
+	  }
+	else if (numerator)
+	  {
+	    /* but we really weren't half way, more bits exist */
+	    quotient += GARDROUND + 1;
+	  }
+      }
+
+    a->fraction.ll = quotient;
+    return (a);
+  }
+}
+
+FLO_type
+divide (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpdiv_parts (&a, &b);
+
+  return pack_d (res);
+}
+#endif /* L_div_sf || L_div_df */
+
+#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df) \
+    || defined(L_fpcmp_parts_tf)
+/* according to the demo, fpcmp returns a comparison with 0... thus
+   a<b -> -1
+   a==b -> 0
+   a>b -> +1
+ */
+
+int
+__fpcmp_parts (fp_number_type * a, fp_number_type * b)
+{
+#if 0
+  /* either nan -> unordered. Must be checked outside of this routine.  */
+  if (isnan (a) && isnan (b))
+    {
+      return 1;			/* still unordered! */
+    }
+#endif
+
+  if (isnan (a) || isnan (b))
+    {
+      return 1;			/* how to indicate unordered compare? */
+    }
+  if (isinf (a) && isinf (b))
+    {
+      /* +inf > -inf, but +inf != +inf */
+      /* b    \a| +inf(0)| -inf(1)
+       ______\+--------+--------
+       +inf(0)| a==b(0)| a<b(-1)
+       -------+--------+--------
+       -inf(1)| a>b(1) | a==b(0)
+       -------+--------+--------
+       So since unordered must be nonzero, just line up the columns...
+       */
+      return b->sign - a->sign;
+    }
+  /* but not both...  */
+  if (isinf (a))
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (isinf (b))
+    {
+      return b->sign ? 1 : -1;
+    }
+  if (iszero (a) && iszero (b))
+    {
+      return 0;
+    }
+  if (iszero (a))
+    {
+      return b->sign ? 1 : -1;
+    }
+  if (iszero (b))
+    {
+      return a->sign ? -1 : 1;
+    }
+  /* now both are "normal".  */
+  if (a->sign != b->sign)
+    {
+      /* opposite signs */
+      return a->sign ? -1 : 1;
+    }
+  /* same sign; exponents? */
+  if (a->normal_exp > b->normal_exp)
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (a->normal_exp < b->normal_exp)
+    {
+      return a->sign ? 1 : -1;
+    }
+  /* same exponents; check size.  */
+  if (a->fraction.ll > b->fraction.ll)
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (a->fraction.ll < b->fraction.ll)
+    {
+      return a->sign ? 1 : -1;
+    }
+  /* after all that, they're equal.  */
+  return 0;
+}
+#endif
+
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compoare_tf)
+CMPtype
+compare (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_compare_sf || L_compare_df */
+
+#ifndef US_SOFTWARE_GOFAST
+
+/* These should be optimized for their specific tasks someday.  */
+
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
+CMPtype
+_eq_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth == 0 */
+
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_eq_sf || L_eq_df */
+
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
+CMPtype
+_ne_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* true, truth != 0 */
+
+  return  __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_ne_sf || L_ne_df */
+
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
+CMPtype
+_gt_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return -1;			/* false, truth > 0 */
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_gt_sf || L_gt_df */
+
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
+CMPtype
+_ge_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return -1;			/* false, truth >= 0 */
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_ge_sf || L_ge_df */
+
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
+CMPtype
+_lt_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth < 0 */
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_lt_sf || L_lt_df */
+
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
+CMPtype
+_le_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth <= 0 */
+
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_le_sf || L_le_df */
+
+#endif /* ! US_SOFTWARE_GOFAST */
+
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
+CMPtype
+_unord_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return (isnan (&a) || isnan (&b));
+}
+#endif /* L_unord_sf || L_unord_df */
+
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
+FLO_type
+si_to_float (SItype arg_a)
+{
+  fp_number_type in;
+
+  in.class = CLASS_NUMBER;
+  in.sign = arg_a < 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      in.normal_exp = FRACBITS + NGARDS;
+      if (in.sign) 
+	{
+	  /* Special case for minint, since there is no +ve integer
+	     representation for it */
+	  if (arg_a == (- MAX_SI_INT - 1))
+	    {
+	      return (FLO_type)(- MAX_SI_INT - 1);
+	    }
+	  in.fraction.ll = (-arg_a);
+	}
+      else
+	in.fraction.ll = arg_a;
+
+      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
+	{
+	  in.fraction.ll <<= 1;
+	  in.normal_exp -= 1;
+	}
+    }
+  return pack_d (&in);
+}
+#endif /* L_si_to_sf || L_si_to_df */
+
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
+FLO_type
+usi_to_float (USItype arg_a)
+{
+  fp_number_type in;
+
+  in.sign = 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      in.class = CLASS_NUMBER;
+      in.normal_exp = FRACBITS + NGARDS;
+      in.fraction.ll = arg_a;
+
+      while (in.fraction.ll > ((fractype)1 << (FRACBITS + NGARDS)))
+        {
+          in.fraction.ll >>= 1;
+          in.normal_exp += 1;
+        }
+      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
+	{
+	  in.fraction.ll <<= 1;
+	  in.normal_exp -= 1;
+	}
+    }
+  return pack_d (&in);
+}
+#endif
+
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
+SItype
+float_to_si (FLO_type arg_a)
+{
+  fp_number_type a;
+  SItype tmp;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  if (iszero (&a))
+    return 0;
+  if (isnan (&a))
+    return 0;
+  /* get reasonable MAX_SI_INT...  */
+  if (isinf (&a))
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
+  /* it is a number, but a small one */
+  if (a.normal_exp < 0)
+    return 0;
+  if (a.normal_exp > BITS_PER_SI - 2)
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
+  tmp = a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
+  return a.sign ? (-tmp) : (tmp);
+}
+#endif /* L_sf_to_si || L_df_to_si */
+
+#if defined(L_sf_to_usi) || defined(L_df_to_usi) || defined(L_tf_to_usi)
+#if defined US_SOFTWARE_GOFAST || defined(L_tf_to_usi)
+/* While libgcc2.c defines its own __fixunssfsi and __fixunsdfsi routines,
+   we also define them for GOFAST because the ones in libgcc2.c have the
+   wrong names and I'd rather define these here and keep GOFAST CYG-LOC's
+   out of libgcc2.c.  We can't define these here if not GOFAST because then
+   there'd be duplicate copies.  */
+
+USItype
+float_to_usi (FLO_type arg_a)
+{
+  fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  if (iszero (&a))
+    return 0;
+  if (isnan (&a))
+    return 0;
+  /* it is a negative number */
+  if (a.sign)
+    return 0;
+  /* get reasonable MAX_USI_INT...  */
+  if (isinf (&a))
+    return MAX_USI_INT;
+  /* it is a number, but a small one */
+  if (a.normal_exp < 0)
+    return 0;
+  if (a.normal_exp > BITS_PER_SI - 1)
+    return MAX_USI_INT;
+  else if (a.normal_exp > (FRACBITS + NGARDS))
+    return a.fraction.ll << (a.normal_exp - (FRACBITS + NGARDS));
+  else
+    return a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
+}
+#endif /* US_SOFTWARE_GOFAST */
+#endif /* L_sf_to_usi || L_df_to_usi */
+
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
+FLO_type
+negate (FLO_type arg_a)
+{
+  fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  flip_sign (&a);
+  return pack_d (&a);
+}
+#endif /* L_negate_sf || L_negate_df */
+
+#ifdef FLOAT
+
+#if defined(L_make_sf)
+SFtype
+__make_fp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     USItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_sf */
+
+#ifndef FLOAT_ONLY
+
+/* This enables one to build an fp library that supports float but not double.
+   Otherwise, we would get an undefined reference to __make_dp.
+   This is needed for some 8-bit ports that can't handle well values that
+   are 8-bytes in size, so we just don't support double for them at all.  */
+
+#if defined(L_sf_to_df)
+DFtype
+sf_to_df (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_dp (in.class, in.sign, in.normal_exp,
+		    ((UDItype) in.fraction.ll) << F_D_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#if defined(L_sf_to_tf) && defined(TMODES)
+TFtype
+sf_to_tf (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << F_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#endif /* ! FLOAT_ONLY */
+#endif /* FLOAT */
+
+#ifndef FLOAT
+
+extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype);
+
+#if defined(L_make_df)
+DFtype
+__make_dp (fp_class_type class, unsigned int sign, int exp, UDItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_df */
+
+#if defined(L_df_to_sf)
+SFtype
+df_to_sf (DFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_D_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((USItype) 1 << F_D_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_df_to_sf */
+
+#if defined(L_df_to_tf) && defined(TMODES) \
+    && !defined(FLOAT) && !defined(TFLOAT)
+TFtype
+df_to_tf (DFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << D_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#ifdef TFLOAT
+#if defined(L_make_tf)
+TFtype
+__make_tp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     UTItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_tf */
+
+#if defined(L_tf_to_df)
+DFtype
+tf_to_df (TFtype arg_a)
+{
+  fp_number_type in;
+  UDItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> D_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << D_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_dp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_df */
+
+#if defined(L_tf_to_sf)
+SFtype
+tf_to_sf (TFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << F_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_sf */
+#endif /* TFLOAT */
+
+#endif /* ! FLOAT */
+#endif /* !EXTENDED_FLOAT_STUBS */
--- ./gcc/gcc/config/nios2/nios2-fp-bit.c
+++ ./gcc/gcc/config/nios2/nios2-fp-bit.c
@@ -0,0 +1,1652 @@
+#define FLOAT
+/* This is a software floating point library which can be used
+   for targets without hardware floating point. 
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.  */
+
+/* This implements IEEE 754 format arithmetic, but does not provide a
+   mechanism for setting the rounding mode, or for generating or handling
+   exceptions.
+
+   The original code by Steve Chamberlain, hacked by Mark Eichin and Jim
+   Wilson, all of Cygnus Support.  */
+
+/* The intended way to use this file is to make two copies, add `#define FLOAT'
+   to one copy, then compile both copies and add them to libgcc.a.  */
+
+#include "tconfig.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "config/fp-bit.h"
+
+/* The following macros can be defined to change the behavior of this file:
+   FLOAT: Implement a `float', aka SFmode, fp library.  If this is not
+     defined, then this file implements a `double', aka DFmode, fp library.
+   FLOAT_ONLY: Used with FLOAT, to implement a `float' only library, i.e.
+     don't include float->double conversion which requires the double library.
+     This is useful only for machines which can't support doubles, e.g. some
+     8-bit processors.
+   CMPtype: Specify the type that floating point compares should return.
+     This defaults to SItype, aka int.
+   US_SOFTWARE_GOFAST: This makes all entry points use the same names as the
+     US Software goFast library.
+   _DEBUG_BITFLOAT: This makes debugging the code a little easier, by adding
+     two integers to the FLO_union_type.
+   NO_DENORMALS: Disable handling of denormals.
+   NO_NANS: Disable nan and infinity handling
+   SMALL_MACHINE: Useful when operations on QIs and HIs are faster
+     than on an SI */
+
+/* We don't currently support extended floats (long doubles) on machines
+   without hardware to deal with them.
+
+   These stubs are just to keep the linker from complaining about unresolved
+   references which can be pulled in from libio & libstdc++, even if the
+   user isn't using long doubles.  However, they may generate an unresolved
+   external to abort if abort is not used by the function, and the stubs
+   are referenced from within libc, since libgcc goes before and after the
+   system library.  */
+
+#ifdef DECLARE_LIBRARY_RENAMES
+  DECLARE_LIBRARY_RENAMES
+#endif
+
+#ifdef EXTENDED_FLOAT_STUBS
+extern void abort (void);
+void __extendsfxf2 (void) { abort(); }
+void __extenddfxf2 (void) { abort(); }
+void __truncxfdf2 (void) { abort(); }
+void __truncxfsf2 (void) { abort(); }
+void __fixxfsi (void) { abort(); }
+void __floatsixf (void) { abort(); }
+void __addxf3 (void) { abort(); }
+void __subxf3 (void) { abort(); }
+void __mulxf3 (void) { abort(); }
+void __divxf3 (void) { abort(); }
+void __negxf2 (void) { abort(); }
+void __eqxf2 (void) { abort(); }
+void __nexf2 (void) { abort(); }
+void __gtxf2 (void) { abort(); }
+void __gexf2 (void) { abort(); }
+void __lexf2 (void) { abort(); }
+void __ltxf2 (void) { abort(); }
+
+void __extendsftf2 (void) { abort(); }
+void __extenddftf2 (void) { abort(); }
+void __trunctfdf2 (void) { abort(); }
+void __trunctfsf2 (void) { abort(); }
+void __fixtfsi (void) { abort(); }
+void __floatsitf (void) { abort(); }
+void __addtf3 (void) { abort(); }
+void __subtf3 (void) { abort(); }
+void __multf3 (void) { abort(); }
+void __divtf3 (void) { abort(); }
+void __negtf2 (void) { abort(); }
+void __eqtf2 (void) { abort(); }
+void __netf2 (void) { abort(); }
+void __gttf2 (void) { abort(); }
+void __getf2 (void) { abort(); }
+void __letf2 (void) { abort(); }
+void __lttf2 (void) { abort(); }
+#else	/* !EXTENDED_FLOAT_STUBS, rest of file */
+
+/* IEEE "special" number predicates */
+
+#ifdef NO_NANS
+
+#define nan() 0
+#define isnan(x) 0
+#define isinf(x) 0
+#else
+
+#if   defined L_thenan_sf
+const fp_number_type __thenan_sf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_df
+const fp_number_type __thenan_df = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_tf
+const fp_number_type __thenan_tf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined TFLOAT
+extern const fp_number_type __thenan_tf;
+#elif defined FLOAT
+extern const fp_number_type __thenan_sf;
+#else
+extern const fp_number_type __thenan_df;
+#endif
+
+INLINE
+static fp_number_type *
+nan (void)
+{
+  /* Discard the const qualifier...  */
+#ifdef TFLOAT
+  return (fp_number_type *) (& __thenan_tf);
+#elif defined FLOAT  
+  return (fp_number_type *) (& __thenan_sf);
+#else
+  return (fp_number_type *) (& __thenan_df);
+#endif
+}
+
+INLINE
+static int
+isnan ( fp_number_type *  x)
+{
+  return x->class == CLASS_SNAN || x->class == CLASS_QNAN;
+}
+
+INLINE
+static int
+isinf ( fp_number_type *  x)
+{
+  return x->class == CLASS_INFINITY;
+}
+
+#endif /* NO_NANS */
+
+INLINE
+static int
+iszero ( fp_number_type *  x)
+{
+  return x->class == CLASS_ZERO;
+}
+
+INLINE 
+static void
+flip_sign ( fp_number_type *  x)
+{
+  x->sign = !x->sign;
+}
+
+extern FLO_type pack_d ( fp_number_type * );
+
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
+FLO_type
+pack_d ( fp_number_type *  src)
+{
+  FLO_union_type dst;
+  fractype fraction = src->fraction.ll;	/* wasn't unsigned before? */
+  int sign = src->sign;
+  int exp = 0;
+
+  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
+    {
+      /* We can't represent these values accurately.  By using the
+	 largest possible magnitude, we guarantee that the conversion
+	 of infinity is at least as big as any finite number.  */
+      exp = EXPMAX;
+      fraction = ((fractype) 1 << FRACBITS) - 1;
+    }
+  else if (isnan (src))
+    {
+      exp = EXPMAX;
+      if (src->class == CLASS_QNAN || 1)
+	{
+#ifdef QUIET_NAN_NEGATED
+	  fraction |= QUIET_NAN - 1;
+#else
+	  fraction |= QUIET_NAN;
+#endif
+	}
+    }
+  else if (isinf (src))
+    {
+      exp = EXPMAX;
+      fraction = 0;
+    }
+  else if (iszero (src))
+    {
+      exp = 0;
+      fraction = 0;
+    }
+  else if (fraction == 0)
+    {
+      exp = 0;
+    }
+  else
+    {
+      if (src->normal_exp < NORMAL_EXPMIN)
+	{
+#ifdef NO_DENORMALS
+	  /* Go straight to a zero representation if denormals are not
+ 	     supported.  The denormal handling would be harmless but
+ 	     isn't unnecessary.  */
+	  exp = 0;
+	  fraction = 0;
+#else /* NO_DENORMALS */
+	  /* This number's exponent is too low to fit into the bits
+	     available in the number, so we'll store 0 in the exponent and
+	     shift the fraction to the right to make up for it.  */
+
+	  int shift = NORMAL_EXPMIN - src->normal_exp;
+
+	  exp = 0;
+
+	  if (shift > FRAC_NBITS - NGARDS)
+	    {
+	      /* No point shifting, since it's more that 64 out.  */
+	      fraction = 0;
+	    }
+	  else
+	    {
+	      int lowbit = (fraction & (((fractype)1 << shift) - 1)) ? 1 : 0;
+	      fraction = (fraction >> shift) | lowbit;
+	    }
+	  if ((fraction & GARDMASK) == GARDMSB)
+	    {
+	      if ((fraction & (1 << NGARDS)))
+		fraction += GARDROUND + 1;
+	    }
+	  else
+	    {
+	      /* Add to the guards to round up.  */
+	      fraction += GARDROUND;
+	    }
+	  /* Perhaps the rounding means we now need to change the
+             exponent, because the fraction is no longer denormal.  */
+	  if (fraction >= IMPLICIT_1)
+	    {
+	      exp += 1;
+	    }
+	  fraction >>= NGARDS;
+#endif /* NO_DENORMALS */
+	}
+      else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
+	       && src->normal_exp > EXPBIAS)
+	{
+	  exp = EXPMAX;
+	  fraction = 0;
+	}
+      else
+	{
+	  exp = src->normal_exp + EXPBIAS;
+	  if (!ROUND_TOWARDS_ZERO)
+	    {
+	      /* IF the gard bits are the all zero, but the first, then we're
+		 half way between two numbers, choose the one which makes the
+		 lsb of the answer 0.  */
+	      if ((fraction & GARDMASK) == GARDMSB)
+		{
+		  if (fraction & (1 << NGARDS))
+		    fraction += GARDROUND + 1;
+		}
+	      else
+		{
+		  /* Add a one to the guards to round up */
+		  fraction += GARDROUND;
+		}
+	      if (fraction >= IMPLICIT_2)
+		{
+		  fraction >>= 1;
+		  exp += 1;
+		}
+	    }
+	  fraction >>= NGARDS;
+
+	  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
+	    {
+	      /* Saturate on overflow.  */
+	      exp = EXPMAX;
+	      fraction = ((fractype) 1 << FRACBITS) - 1;
+	    }
+	}
+    }
+
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  dst.bits.fraction = fraction;
+  dst.bits.exp = exp;
+  dst.bits.sign = sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low, unity;
+   int lowsign, lowexp;
+
+   unity = (halffractype) 1 << HALFFRACBITS;
+
+   /* Set HIGH to the high double's significand, masking out the implicit 1.
+      Set LOW to the low double's full significand.  */
+   high = (fraction >> (FRACBITS - HALFFRACBITS)) & (unity - 1);
+   low = fraction & (unity * 2 - 1);
+
+   /* Get the initial sign and exponent of the low double.  */
+   lowexp = exp - HALFFRACBITS - 1;
+   lowsign = sign;
+
+   /* HIGH should be rounded like a normal double, making |LOW| <=
+      0.5 ULP of HIGH.  Assume round-to-nearest.  */
+   if (exp < EXPMAX)
+     if (low > unity || (low == unity && (high & 1) == 1))
+       {
+	 /* Round HIGH up and adjust LOW to match.  */
+	 high++;
+	 if (high == unity)
+	   {
+	     /* May make it infinite, but that's OK.  */
+	     high = 0;
+	     exp++;
+	   }
+	 low = unity * 2 - low;
+	 lowsign ^= 1;
+       }
+
+   high |= (halffractype) exp << HALFFRACBITS;
+   high |= (halffractype) sign << (HALFFRACBITS + EXPBITS);
+
+   if (exp == EXPMAX || exp == 0 || low == 0)
+     low = 0;
+   else
+     {
+       while (lowexp > 0 && low < unity)
+	 {
+	   low <<= 1;
+	   lowexp--;
+	 }
+
+       if (lowexp <= 0)
+	 {
+	   halffractype roundmsb, round;
+	   int shift;
+
+	   shift = 1 - lowexp;
+	   roundmsb = (1 << (shift - 1));
+	   round = low & ((roundmsb << 1) - 1);
+
+	   low >>= shift;
+	   lowexp = 0;
+
+	   if (round > roundmsb || (round == roundmsb && (low & 1) == 1))
+	     {
+	       low++;
+	       if (low == unity)
+		 /* LOW rounds up to the smallest normal number.  */
+		 lowexp++;
+	     }
+	 }
+
+       low &= unity - 1;
+       low |= (halffractype) lowexp << HALFFRACBITS;
+       low |= (halffractype) lowsign << (HALFFRACBITS + EXPBITS);
+     }
+   dst.value_raw = ((fractype) high << HALFSHIFT) | low;
+ }
+# else
+  dst.value_raw = fraction & ((((fractype)1) << FRACBITS) - (fractype)1);
+  dst.value_raw |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << FRACBITS;
+  dst.value_raw |= ((fractype) (sign & 1)) << (FRACBITS | EXPBITS);
+# endif
+#endif
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+#ifdef TFLOAT
+  {
+    qrtrfractype tmp1 = dst.words[0];
+    qrtrfractype tmp2 = dst.words[1];
+    dst.words[0] = dst.words[3];
+    dst.words[1] = dst.words[2];
+    dst.words[2] = tmp2;
+    dst.words[3] = tmp1;
+  }
+#else
+  {
+    halffractype tmp = dst.words[0];
+    dst.words[0] = dst.words[1];
+    dst.words[1] = tmp;
+  }
+#endif
+#endif
+
+  return dst.value;
+}
+#endif
+
+#if defined(L_unpack_df) || defined(L_unpack_sf) || defined(L_unpack_tf)
+void
+unpack_d (FLO_union_type * src, fp_number_type * dst)
+{
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+  fractype fraction;
+  int exp;
+  int sign;
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+  FLO_union_type swapped;
+
+#ifdef TFLOAT
+  swapped.words[0] = src->words[3];
+  swapped.words[1] = src->words[2];
+  swapped.words[2] = src->words[1];
+  swapped.words[3] = src->words[0];
+#else
+  swapped.words[0] = src->words[1];
+  swapped.words[1] = src->words[0];
+#endif
+  src = &swapped;
+#endif
+  
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  fraction = src->bits.fraction;
+  exp = src->bits.exp;
+  sign = src->bits.sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low;
+   
+   high = src->value_raw >> HALFSHIFT;
+   low = src->value_raw & (((fractype)1 << HALFSHIFT) - 1);
+
+   fraction = high & ((((fractype)1) << HALFFRACBITS) - 1);
+   fraction <<= FRACBITS - HALFFRACBITS;
+   exp = ((int)(high >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+   sign = ((int)(high >> (((HALFFRACBITS + EXPBITS))))) & 1;
+
+   if (exp != EXPMAX && exp != 0 && low != 0)
+     {
+       int lowexp = ((int)(low >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+       int lowsign = ((int)(low >> (((HALFFRACBITS + EXPBITS))))) & 1;
+       int shift;
+       fractype xlow;
+
+       xlow = low & ((((fractype)1) << HALFFRACBITS) - 1);
+       if (lowexp)
+	 xlow |= (((halffractype)1) << HALFFRACBITS);
+       else
+	 lowexp = 1;
+       shift = (FRACBITS - HALFFRACBITS) - (exp - lowexp);
+       if (shift > 0)
+	 xlow <<= shift;
+       else if (shift < 0)
+	 xlow >>= -shift;
+       if (sign == lowsign)
+	 fraction += xlow;
+       else if (fraction >= xlow)
+	 fraction -= xlow;
+       else
+	 {
+	   /* The high part is a power of two but the full number is lower.
+	      This code will leave the implicit 1 in FRACTION, but we'd
+	      have added that below anyway.  */
+	   fraction = (((fractype) 1 << FRACBITS) - xlow) << 1;
+	   exp--;
+	 }
+     }
+ }
+# else
+  fraction = src->value_raw & ((((fractype)1) << FRACBITS) - 1);
+  exp = ((int)(src->value_raw >> FRACBITS)) & ((1 << EXPBITS) - 1);
+  sign = ((int)(src->value_raw >> (FRACBITS + EXPBITS))) & 1;
+# endif
+#endif
+
+  dst->sign = sign;
+  if (exp == 0)
+    {
+      /* Hmm.  Looks like 0 */
+      if (fraction == 0
+#ifdef NO_DENORMALS
+	  || 1
+#endif
+	  )
+	{
+	  /* tastes like zero */
+	  dst->class = CLASS_ZERO;
+	}
+      else
+	{
+	  /* Zero exponent with nonzero fraction - it's denormalized,
+	     so there isn't a leading implicit one - we'll shift it so
+	     it gets one.  */
+	  dst->normal_exp = exp - EXPBIAS + 1;
+	  fraction <<= NGARDS;
+
+	  dst->class = CLASS_NUMBER;
+#if 1
+	  while (fraction < IMPLICIT_1)
+	    {
+	      fraction <<= 1;
+	      dst->normal_exp--;
+	    }
+#endif
+	  dst->fraction.ll = fraction;
+	}
+    }
+  else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp == EXPMAX)
+    {
+      /* Huge exponent*/
+      if (fraction == 0)
+	{
+	  /* Attached to a zero fraction - means infinity */
+	  dst->class = CLASS_INFINITY;
+	}
+      else
+	{
+	  /* Nonzero fraction, means nan */
+#ifdef QUIET_NAN_NEGATED
+	  if ((fraction & QUIET_NAN) == 0)
+#else
+	  if (fraction & QUIET_NAN)
+#endif
+	    {
+	      dst->class = CLASS_QNAN;
+	    }
+	  else
+	    {
+	      dst->class = CLASS_SNAN;
+	    }
+	  /* Keep the fraction part as the nan number */
+	  dst->fraction.ll = fraction;
+	}
+    }
+  else
+    {
+      /* Nothing strange about this number */
+      dst->normal_exp = exp - EXPBIAS;
+      dst->class = CLASS_NUMBER;
+      dst->fraction.ll = (fraction << NGARDS) | IMPLICIT_1;
+    }
+}
+#endif /* L_unpack_df || L_unpack_sf */
+
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
+static fp_number_type *
+_fpadd_parts (fp_number_type * a,
+	      fp_number_type * b,
+	      fp_number_type * tmp)
+{
+  intfrac tfraction;
+
+  /* Put commonly used fields in local variables.  */
+  int a_normal_exp;
+  int b_normal_exp;
+  fractype a_fraction;
+  fractype b_fraction;
+
+  if (isnan (a))
+    {
+      return a;
+    }
+  if (isnan (b))
+    {
+      return b;
+    }
+  if (isinf (a))
+    {
+      /* Adding infinities with opposite signs yields a NaN.  */
+      if (isinf (b) && a->sign != b->sign)
+	return nan ();
+      return a;
+    }
+  if (isinf (b))
+    {
+      return b;
+    }
+  if (iszero (b))
+    {
+      if (iszero (a))
+	{
+	  *tmp = *a;
+	  tmp->sign = a->sign & b->sign;
+	  return tmp;
+	}
+      return a;
+    }
+  if (iszero (a))
+    {
+      return b;
+    }
+
+  /* Got two numbers. shift the smaller and increment the exponent till
+     they're the same */
+  {
+    int diff;
+
+    a_normal_exp = a->normal_exp;
+    b_normal_exp = b->normal_exp;
+    a_fraction = a->fraction.ll;
+    b_fraction = b->fraction.ll;
+
+    diff = a_normal_exp - b_normal_exp;
+
+    if (diff < 0)
+      diff = -diff;
+    if (diff < FRAC_NBITS)
+      {
+	/* ??? This does shifts one bit at a time.  Optimize.  */
+	while (a_normal_exp > b_normal_exp)
+	  {
+	    b_normal_exp++;
+	    LSHIFT (b_fraction);
+	  }
+	while (b_normal_exp > a_normal_exp)
+	  {
+	    a_normal_exp++;
+	    LSHIFT (a_fraction);
+	  }
+      }
+    else
+      {
+	/* Somethings's up.. choose the biggest */
+	if (a_normal_exp > b_normal_exp)
+	  {
+	    b_normal_exp = a_normal_exp;
+	    b_fraction = 0;
+	  }
+	else
+	  {
+	    a_normal_exp = b_normal_exp;
+	    a_fraction = 0;
+	  }
+      }
+  }
+
+  if (a->sign != b->sign)
+    {
+      if (a->sign)
+	{
+	  tfraction = -a_fraction + b_fraction;
+	}
+      else
+	{
+	  tfraction = a_fraction - b_fraction;
+	}
+      if (tfraction >= 0)
+	{
+	  tmp->sign = 0;
+	  tmp->normal_exp = a_normal_exp;
+	  tmp->fraction.ll = tfraction;
+	}
+      else
+	{
+	  tmp->sign = 1;
+	  tmp->normal_exp = a_normal_exp;
+	  tmp->fraction.ll = -tfraction;
+	}
+      /* and renormalize it */
+
+      while (tmp->fraction.ll < IMPLICIT_1 && tmp->fraction.ll)
+	{
+	  tmp->fraction.ll <<= 1;
+	  tmp->normal_exp--;
+	}
+    }
+  else
+    {
+      tmp->sign = a->sign;
+      tmp->normal_exp = a_normal_exp;
+      tmp->fraction.ll = a_fraction + b_fraction;
+    }
+  tmp->class = CLASS_NUMBER;
+  /* Now the fraction is added, we have to shift down to renormalize the
+     number */
+
+  if (tmp->fraction.ll >= IMPLICIT_2)
+    {
+      LSHIFT (tmp->fraction.ll);
+      tmp->normal_exp++;
+    }
+  return tmp;
+
+}
+
+FLO_type
+add (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpadd_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+
+FLO_type
+sub (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  b.sign ^= 1;
+
+  res = _fpadd_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+#endif /* L_addsub_sf || L_addsub_df */
+
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
+static inline __attribute__ ((__always_inline__)) fp_number_type *
+_fpmul_parts ( fp_number_type *  a,
+	       fp_number_type *  b,
+	       fp_number_type * tmp)
+{
+  fractype low = 0;
+  fractype high = 0;
+
+  if (isnan (a))
+    {
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (isnan (b))
+    {
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+  if (isinf (a))
+    {
+      if (iszero (b))
+	return nan ();
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (isinf (b))
+    {
+      if (iszero (a))
+	{
+	  return nan ();
+	}
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+  if (iszero (a))
+    {
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (iszero (b))
+    {
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+
+  /* Calculate the mantissa by multiplying both numbers to get a
+     twice-as-wide number.  */
+  {
+#if defined(NO_DI_MODE) || defined(TFLOAT)
+    {
+      fractype x = a->fraction.ll;
+      fractype ylow = b->fraction.ll;
+      fractype yhigh = 0;
+      int bit;
+
+      /* ??? This does multiplies one bit at a time.  Optimize.  */
+      for (bit = 0; bit < FRAC_NBITS; bit++)
+	{
+	  int carry;
+
+	  if (x & 1)
+	    {
+	      carry = (low += ylow) < ylow;
+	      high += yhigh + carry;
+	    }
+	  yhigh <<= 1;
+	  if (ylow & FRACHIGH)
+	    {
+	      yhigh |= 1;
+	    }
+	  ylow <<= 1;
+	  x >>= 1;
+	}
+    }
+#elif defined(FLOAT) 
+    /* Multiplying two USIs to get a UDI, we're safe.  */
+    {
+      UDItype answer = (UDItype)a->fraction.ll * (UDItype)b->fraction.ll;
+      
+      high = answer >> BITS_PER_SI;
+      low = answer;
+    }
+#else
+    /* fractype is DImode, but we need the result to be twice as wide.
+       Assuming a widening multiply from DImode to TImode is not
+       available, build one by hand.  */
+    {
+      USItype nl = a->fraction.ll;
+      USItype nh = a->fraction.ll >> BITS_PER_SI;
+      USItype ml = b->fraction.ll;
+      USItype mh = b->fraction.ll >> BITS_PER_SI;
+      UDItype pp_ll = (UDItype) ml * nl;
+      UDItype pp_hl = (UDItype) mh * nl;
+      UDItype pp_lh = (UDItype) ml * nh;
+      UDItype pp_hh = (UDItype) mh * nh;
+      UDItype res2 = 0;
+      UDItype res0 = 0;
+      UDItype ps_hh__ = pp_hl + pp_lh;
+      if (ps_hh__ < pp_hl)
+	res2 += (UDItype)1 << BITS_PER_SI;
+      pp_hl = (UDItype)(USItype)ps_hh__ << BITS_PER_SI;
+      res0 = pp_ll + pp_hl;
+      if (res0 < pp_ll)
+	res2++;
+      res2 += (ps_hh__ >> BITS_PER_SI) + pp_hh;
+      high = res2;
+      low = res0;
+    }
+#endif
+  }
+
+  tmp->normal_exp = a->normal_exp + b->normal_exp
+    + FRAC_NBITS - (FRACBITS + NGARDS);
+  tmp->sign = a->sign != b->sign;
+  while (high >= IMPLICIT_2)
+    {
+      tmp->normal_exp++;
+      if (high & 1)
+	{
+	  low >>= 1;
+	  low |= FRACHIGH;
+	}
+      high >>= 1;
+    }
+  while (high < IMPLICIT_1)
+    {
+      tmp->normal_exp--;
+
+      high <<= 1;
+      if (low & FRACHIGH)
+	high |= 1;
+      low <<= 1;
+    }
+  /* rounding is tricky. if we only round if it won't make us round later.  */
+#if 0
+  if (low & FRACHIGH2)
+    {
+      if (((high & GARDMASK) != GARDMSB)
+	  && (((high + 1) & GARDMASK) == GARDMSB))
+	{
+	  /* don't round, it gets done again later.  */
+	}
+      else
+	{
+	  high++;
+	}
+    }
+#endif
+  if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
+    {
+      if (high & (1 << NGARDS))
+	{
+	  /* half way, so round to even */
+	  high += GARDROUND + 1;
+	}
+      else if (low)
+	{
+	  /* but we really weren't half way */
+	  high += GARDROUND + 1;
+	}
+    }
+  tmp->fraction.ll = high;
+  tmp->class = CLASS_NUMBER;
+  return tmp;
+}
+
+FLO_type
+multiply (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpmul_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+#endif /* L_mul_sf || L_mul_df */
+
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
+static inline __attribute__ ((__always_inline__)) fp_number_type *
+_fpdiv_parts (fp_number_type * a,
+	      fp_number_type * b)
+{
+  fractype bit;
+  fractype numerator;
+  fractype denominator;
+  fractype quotient;
+
+  if (isnan (a))
+    {
+      return a;
+    }
+  if (isnan (b))
+    {
+      return b;
+    }
+
+  a->sign = a->sign ^ b->sign;
+
+  if (isinf (a) || iszero (a))
+    {
+      if (a->class == b->class)
+	return nan ();
+      return a;
+    }
+
+  if (isinf (b))
+    {
+      a->fraction.ll = 0;
+      a->normal_exp = 0;
+      return a;
+    }
+  if (iszero (b))
+    {
+      a->class = CLASS_INFINITY;
+      return a;
+    }
+
+  /* Calculate the mantissa by multiplying both 64bit numbers to get a
+     128 bit number */
+  {
+    /* quotient =
+       ( numerator / denominator) * 2^(numerator exponent -  denominator exponent)
+     */
+
+    a->normal_exp = a->normal_exp - b->normal_exp;
+    numerator = a->fraction.ll;
+    denominator = b->fraction.ll;
+
+    if (numerator < denominator)
+      {
+	/* Fraction will be less than 1.0 */
+	numerator *= 2;
+	a->normal_exp--;
+      }
+    bit = IMPLICIT_1;
+    quotient = 0;
+    /* ??? Does divide one bit at a time.  Optimize.  */
+    while (bit)
+      {
+	if (numerator >= denominator)
+	  {
+	    quotient |= bit;
+	    numerator -= denominator;
+	  }
+	bit >>= 1;
+	numerator *= 2;
+      }
+
+    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
+      {
+	if (quotient & (1 << NGARDS))
+	  {
+	    /* half way, so round to even */
+	    quotient += GARDROUND + 1;
+	  }
+	else if (numerator)
+	  {
+	    /* but we really weren't half way, more bits exist */
+	    quotient += GARDROUND + 1;
+	  }
+      }
+
+    a->fraction.ll = quotient;
+    return (a);
+  }
+}
+
+FLO_type
+divide (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpdiv_parts (&a, &b);
+
+  return pack_d (res);
+}
+#endif /* L_div_sf || L_div_df */
+
+#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df) \
+    || defined(L_fpcmp_parts_tf)
+/* according to the demo, fpcmp returns a comparison with 0... thus
+   a<b -> -1
+   a==b -> 0
+   a>b -> +1
+ */
+
+int
+__fpcmp_parts (fp_number_type * a, fp_number_type * b)
+{
+#if 0
+  /* either nan -> unordered. Must be checked outside of this routine.  */
+  if (isnan (a) && isnan (b))
+    {
+      return 1;			/* still unordered! */
+    }
+#endif
+
+  if (isnan (a) || isnan (b))
+    {
+      return 1;			/* how to indicate unordered compare? */
+    }
+  if (isinf (a) && isinf (b))
+    {
+      /* +inf > -inf, but +inf != +inf */
+      /* b    \a| +inf(0)| -inf(1)
+       ______\+--------+--------
+       +inf(0)| a==b(0)| a<b(-1)
+       -------+--------+--------
+       -inf(1)| a>b(1) | a==b(0)
+       -------+--------+--------
+       So since unordered must be nonzero, just line up the columns...
+       */
+      return b->sign - a->sign;
+    }
+  /* but not both...  */
+  if (isinf (a))
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (isinf (b))
+    {
+      return b->sign ? 1 : -1;
+    }
+  if (iszero (a) && iszero (b))
+    {
+      return 0;
+    }
+  if (iszero (a))
+    {
+      return b->sign ? 1 : -1;
+    }
+  if (iszero (b))
+    {
+      return a->sign ? -1 : 1;
+    }
+  /* now both are "normal".  */
+  if (a->sign != b->sign)
+    {
+      /* opposite signs */
+      return a->sign ? -1 : 1;
+    }
+  /* same sign; exponents? */
+  if (a->normal_exp > b->normal_exp)
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (a->normal_exp < b->normal_exp)
+    {
+      return a->sign ? 1 : -1;
+    }
+  /* same exponents; check size.  */
+  if (a->fraction.ll > b->fraction.ll)
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (a->fraction.ll < b->fraction.ll)
+    {
+      return a->sign ? 1 : -1;
+    }
+  /* after all that, they're equal.  */
+  return 0;
+}
+#endif
+
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compoare_tf)
+CMPtype
+compare (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_compare_sf || L_compare_df */
+
+#ifndef US_SOFTWARE_GOFAST
+
+/* These should be optimized for their specific tasks someday.  */
+
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
+CMPtype
+_eq_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth == 0 */
+
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_eq_sf || L_eq_df */
+
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
+CMPtype
+_ne_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* true, truth != 0 */
+
+  return  __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_ne_sf || L_ne_df */
+
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
+CMPtype
+_gt_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return -1;			/* false, truth > 0 */
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_gt_sf || L_gt_df */
+
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
+CMPtype
+_ge_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return -1;			/* false, truth >= 0 */
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_ge_sf || L_ge_df */
+
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
+CMPtype
+_lt_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth < 0 */
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_lt_sf || L_lt_df */
+
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
+CMPtype
+_le_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth <= 0 */
+
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_le_sf || L_le_df */
+
+#endif /* ! US_SOFTWARE_GOFAST */
+
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
+CMPtype
+_unord_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return (isnan (&a) || isnan (&b));
+}
+#endif /* L_unord_sf || L_unord_df */
+
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
+FLO_type
+si_to_float (SItype arg_a)
+{
+  fp_number_type in;
+
+  in.class = CLASS_NUMBER;
+  in.sign = arg_a < 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      in.normal_exp = FRACBITS + NGARDS;
+      if (in.sign) 
+	{
+	  /* Special case for minint, since there is no +ve integer
+	     representation for it */
+	  if (arg_a == (- MAX_SI_INT - 1))
+	    {
+	      return (FLO_type)(- MAX_SI_INT - 1);
+	    }
+	  in.fraction.ll = (-arg_a);
+	}
+      else
+	in.fraction.ll = arg_a;
+
+      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
+	{
+	  in.fraction.ll <<= 1;
+	  in.normal_exp -= 1;
+	}
+    }
+  return pack_d (&in);
+}
+#endif /* L_si_to_sf || L_si_to_df */
+
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
+FLO_type
+usi_to_float (USItype arg_a)
+{
+  fp_number_type in;
+
+  in.sign = 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      in.class = CLASS_NUMBER;
+      in.normal_exp = FRACBITS + NGARDS;
+      in.fraction.ll = arg_a;
+
+      while (in.fraction.ll > ((fractype)1 << (FRACBITS + NGARDS)))
+        {
+          in.fraction.ll >>= 1;
+          in.normal_exp += 1;
+        }
+      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
+	{
+	  in.fraction.ll <<= 1;
+	  in.normal_exp -= 1;
+	}
+    }
+  return pack_d (&in);
+}
+#endif
+
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
+SItype
+float_to_si (FLO_type arg_a)
+{
+  fp_number_type a;
+  SItype tmp;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  if (iszero (&a))
+    return 0;
+  if (isnan (&a))
+    return 0;
+  /* get reasonable MAX_SI_INT...  */
+  if (isinf (&a))
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
+  /* it is a number, but a small one */
+  if (a.normal_exp < 0)
+    return 0;
+  if (a.normal_exp > BITS_PER_SI - 2)
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
+  tmp = a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
+  return a.sign ? (-tmp) : (tmp);
+}
+#endif /* L_sf_to_si || L_df_to_si */
+
+#if defined(L_sf_to_usi) || defined(L_df_to_usi) || defined(L_tf_to_usi)
+#if defined US_SOFTWARE_GOFAST || defined(L_tf_to_usi)
+/* While libgcc2.c defines its own __fixunssfsi and __fixunsdfsi routines,
+   we also define them for GOFAST because the ones in libgcc2.c have the
+   wrong names and I'd rather define these here and keep GOFAST CYG-LOC's
+   out of libgcc2.c.  We can't define these here if not GOFAST because then
+   there'd be duplicate copies.  */
+
+USItype
+float_to_usi (FLO_type arg_a)
+{
+  fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  if (iszero (&a))
+    return 0;
+  if (isnan (&a))
+    return 0;
+  /* it is a negative number */
+  if (a.sign)
+    return 0;
+  /* get reasonable MAX_USI_INT...  */
+  if (isinf (&a))
+    return MAX_USI_INT;
+  /* it is a number, but a small one */
+  if (a.normal_exp < 0)
+    return 0;
+  if (a.normal_exp > BITS_PER_SI - 1)
+    return MAX_USI_INT;
+  else if (a.normal_exp > (FRACBITS + NGARDS))
+    return a.fraction.ll << (a.normal_exp - (FRACBITS + NGARDS));
+  else
+    return a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
+}
+#endif /* US_SOFTWARE_GOFAST */
+#endif /* L_sf_to_usi || L_df_to_usi */
+
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
+FLO_type
+negate (FLO_type arg_a)
+{
+  fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  flip_sign (&a);
+  return pack_d (&a);
+}
+#endif /* L_negate_sf || L_negate_df */
+
+#ifdef FLOAT
+
+#if defined(L_make_sf)
+SFtype
+__make_fp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     USItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_sf */
+
+#ifndef FLOAT_ONLY
+
+/* This enables one to build an fp library that supports float but not double.
+   Otherwise, we would get an undefined reference to __make_dp.
+   This is needed for some 8-bit ports that can't handle well values that
+   are 8-bytes in size, so we just don't support double for them at all.  */
+
+#if defined(L_sf_to_df)
+DFtype
+sf_to_df (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_dp (in.class, in.sign, in.normal_exp,
+		    ((UDItype) in.fraction.ll) << F_D_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#if defined(L_sf_to_tf) && defined(TMODES)
+TFtype
+sf_to_tf (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << F_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#endif /* ! FLOAT_ONLY */
+#endif /* FLOAT */
+
+#ifndef FLOAT
+
+extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype);
+
+#if defined(L_make_df)
+DFtype
+__make_dp (fp_class_type class, unsigned int sign, int exp, UDItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_df */
+
+#if defined(L_df_to_sf)
+SFtype
+df_to_sf (DFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_D_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((USItype) 1 << F_D_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_df_to_sf */
+
+#if defined(L_df_to_tf) && defined(TMODES) \
+    && !defined(FLOAT) && !defined(TFLOAT)
+TFtype
+df_to_tf (DFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << D_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#ifdef TFLOAT
+#if defined(L_make_tf)
+TFtype
+__make_tp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     UTItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_tf */
+
+#if defined(L_tf_to_df)
+DFtype
+tf_to_df (TFtype arg_a)
+{
+  fp_number_type in;
+  UDItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> D_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << D_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_dp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_df */
+
+#if defined(L_tf_to_sf)
+SFtype
+tf_to_sf (TFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << F_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_sf */
+#endif /* TFLOAT */
+
+#endif /* ! FLOAT */
+#endif /* !EXTENDED_FLOAT_STUBS */
--- ./gcc/gcc/libgcc2.c
+++ ./gcc/gcc/libgcc2.c
@@ -0,0 +1,1669 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
+
+
+/* We include auto-host.h here to get HAVE_GAS_HIDDEN.  This is
+   supposedly valid even though this is a "target" file.  */
+#include "auto-host.h"
+
+/* It is incorrect to include config.h here, because this file is being
+   compiled for the target, and hence definitions concerning only the host
+   do not apply.  */
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* Don't use `fancy_abort' here even if config.h says to use it.  */
+#ifdef abort
+#undef abort
+#endif
+
+#ifdef HAVE_GAS_HIDDEN
+#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
+#else
+#define ATTRIBUTE_HIDDEN
+#endif
+
+#include "libgcc2.h"
+\f
+#ifdef DECLARE_LIBRARY_RENAMES
+  DECLARE_LIBRARY_RENAMES
+#endif
+
+#if defined (L_negdi2)
+DWtype
+__negdi2 (DWtype u)
+{
+  const DWunion uu = {.ll = u};
+  const DWunion w = { {.low = -uu.s.low,
+		       .high = -uu.s.high - ((UWtype) -uu.s.low > 0) } };
+
+  return w.ll;
+}
+#endif
+
+#ifdef L_addvsi3
+Wtype
+__addvsi3 (Wtype a, Wtype b)
+{
+  const Wtype w = a + b;
+
+  if (b >= 0 ? w < a : w > a)
+    abort ();
+
+  return w;
+}
+#endif
+\f
+#ifdef L_addvdi3
+DWtype
+__addvdi3 (DWtype a, DWtype b)
+{
+  const DWtype w = a + b;
+
+  if (b >= 0 ? w < a : w > a)
+    abort ();
+
+  return w;
+}
+#endif
+\f
+#ifdef L_subvsi3
+Wtype
+__subvsi3 (Wtype a, Wtype b)
+{
+  const DWtype w = a - b;
+
+  if (b >= 0 ? w > a : w < a)
+    abort ();
+
+  return w;
+}
+#endif
+\f
+#ifdef L_subvdi3
+DWtype
+__subvdi3 (DWtype a, DWtype b)
+{
+  const DWtype w = a - b;
+
+  if (b >= 0 ? w > a : w < a)
+    abort ();
+
+  return w;
+}
+#endif
+\f
+#ifdef L_mulvsi3
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+Wtype
+__mulvsi3 (Wtype a, Wtype b)
+{
+  const DWtype w = (DWtype) a * (DWtype) b;
+
+  if (((a >= 0) == (b >= 0))
+      ? (UDWtype) w > (UDWtype) (((DWtype) 1 << (WORD_SIZE - 1)) - 1)
+      : (UDWtype) w < (UDWtype) ((DWtype) -1 << (WORD_SIZE - 1)))
+    abort ();
+
+  return w;
+}
+#endif
+\f
+#ifdef L_negvsi2
+Wtype
+__negvsi2 (Wtype a)
+{
+  const Wtype w = -a;
+
+  if (a >= 0 ? w > 0 : w < 0)
+    abort ();
+
+   return w;
+}
+#endif
+\f
+#ifdef L_negvdi2
+DWtype
+__negvdi2 (DWtype a)
+{
+  const DWtype w = -a;
+
+  if (a >= 0 ? w > 0 : w < 0)
+    abort ();
+
+  return w;
+}
+#endif
+\f
+#ifdef L_absvsi2
+Wtype
+__absvsi2 (Wtype a)
+{
+  Wtype w = a;
+
+  if (a < 0)
+#ifdef L_negvsi2
+    w = __negvsi2 (a);
+#else
+    w = -a;
+
+  if (w < 0)
+    abort ();
+#endif
+
+   return w;
+}
+#endif
+\f
+#ifdef L_absvdi2
+DWtype
+__absvdi2 (DWtype a)
+{
+  DWtype w = a;
+
+  if (a < 0)
+#ifdef L_negvdi2
+    w = __negvdi2 (a);
+#else
+    w = -a;
+
+  if (w < 0)
+    abort ();
+#endif
+
+  return w;
+}
+#endif
+\f
+#ifdef L_mulvdi3
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+DWtype
+__mulvdi3 (DWtype u, DWtype v)
+{
+  /* The unchecked multiplication needs 3 Wtype x Wtype multiplications,
+     but the checked multiplication needs only two.  */
+  const DWunion uu = {.ll = u};
+  const DWunion vv = {.ll = v};
+
+  if (__builtin_expect (uu.s.high == uu.s.low >> (WORD_SIZE - 1), 1))
+    {
+      /* u fits in a single Wtype.  */
+      if (__builtin_expect (vv.s.high == vv.s.low >> (WORD_SIZE - 1), 1))
+	{
+	  /* v fits in a single Wtype as well.  */
+	  /* A single multiplication.  No overflow risk.  */
+	  return (DWtype) uu.s.low * (DWtype) vv.s.low;
+	}
+      else
+	{
+	  /* Two multiplications.  */
+	  DWunion w0 = {.ll = (UDWtype) (UWtype) uu.s.low
+			* (UDWtype) (UWtype) vv.s.low};
+	  DWunion w1 = {.ll = (UDWtype) (UWtype) uu.s.low
+			* (UDWtype) (UWtype) vv.s.high};
+
+	  if (vv.s.high < 0)
+	    w1.s.high -= uu.s.low;
+	  if (uu.s.low < 0)
+	    w1.ll -= vv.ll;
+	  w1.ll += (UWtype) w0.s.high;
+	  if (__builtin_expect (w1.s.high == w1.s.low >> (WORD_SIZE - 1), 1))
+	    {
+	      w0.s.high = w1.s.low;
+	      return w0.ll;
+	    }
+	}
+    }
+  else
+    {
+      if (__builtin_expect (vv.s.high == vv.s.low >> (WORD_SIZE - 1), 1))
+	{
+	  /* v fits into a single Wtype.  */
+	  /* Two multiplications.  */
+	  DWunion w0 = {.ll = (UDWtype) (UWtype) uu.s.low
+			* (UDWtype) (UWtype) vv.s.low};
+	  DWunion w1 = {.ll = (UDWtype) (UWtype) uu.s.high
+			* (UDWtype) (UWtype) vv.s.low};
+
+	  if (uu.s.high < 0)
+	    w1.s.high -= vv.s.low;
+	  if (vv.s.low < 0)
+	    w1.ll -= uu.ll;
+	  w1.ll += (UWtype) w0.s.high;
+	  if (__builtin_expect (w1.s.high == w1.s.low >> (WORD_SIZE - 1), 1))
+	    {
+	      w0.s.high = w1.s.low;
+	      return w0.ll;
+	    }
+	}
+      else
+	{
+	  /* A few sign checks and a single multiplication.  */
+	  if (uu.s.high >= 0)
+	    {
+	      if (vv.s.high >= 0)
+		{
+		  if (uu.s.high == 0 && vv.s.high == 0)
+		    {
+		      const DWtype w = (UDWtype) (UWtype) uu.s.low
+			* (UDWtype) (UWtype) vv.s.low;
+		      if (__builtin_expect (w >= 0, 1))
+			return w;
+		    }
+		}
+	      else
+		{
+		  if (uu.s.high == 0 && vv.s.high == (Wtype) -1)
+		    {
+		      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
+				    * (UDWtype) (UWtype) vv.s.low};
+
+		      ww.s.high -= uu.s.low;
+		      if (__builtin_expect (ww.s.high < 0, 1))
+			return ww.ll;
+		    }
+		}
+	    }
+	  else
+	    {
+	      if (vv.s.high >= 0)
+		{
+		  if (uu.s.high == (Wtype) -1 && vv.s.high == 0)
+		    {
+		      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
+				    * (UDWtype) (UWtype) vv.s.low};
+
+		      ww.s.high -= vv.s.low;
+		      if (__builtin_expect (ww.s.high < 0, 1))
+			return ww.ll;
+		    }
+		}
+	      else
+		{
+		  if (uu.s.high == (Wtype) -1 && vv.s.high == (Wtype) - 1)
+		    {
+		      DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
+				    * (UDWtype) (UWtype) vv.s.low};
+
+		      ww.s.high -= uu.s.low;
+		      ww.s.high -= vv.s.low;
+		      if (__builtin_expect (ww.s.high >= 0, 1))
+			return ww.ll;
+		    }
+		}
+	    }
+	}
+    }
+
+  /* Overflow.  */
+  abort ();
+}
+#endif
+\f
+
+/* Unless shift functions are defined with full ANSI prototypes,
+   parameter b will be promoted to int if word_type is smaller than an int.  */
+#ifdef L_lshrdi3
+DWtype
+__lshrdi3 (DWtype u, word_type b)
+{
+  if (b == 0)
+    return u;
+
+  const DWunion uu = {.ll = u};
+  const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+  DWunion w;
+
+  if (bm <= 0)
+    {
+      w.s.high = 0;
+      w.s.low = (UWtype) uu.s.high >> -bm;
+    }
+  else
+    {
+      const UWtype carries = (UWtype) uu.s.high << bm;
+
+      w.s.high = (UWtype) uu.s.high >> b;
+      w.s.low = ((UWtype) uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}
+#endif
+
+#ifdef L_ashldi3
+DWtype
+__ashldi3 (DWtype u, word_type b)
+{
+  if (b == 0)
+    return u;
+
+  const DWunion uu = {.ll = u};
+  const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+  DWunion w;
+
+  if (bm <= 0)
+    {
+      w.s.low = 0;
+      w.s.high = (UWtype) uu.s.low << -bm;
+    }
+  else
+    {
+      const UWtype carries = (UWtype) uu.s.low >> bm;
+
+      w.s.low = (UWtype) uu.s.low << b;
+      w.s.high = ((UWtype) uu.s.high << b) | carries;
+    }
+
+  return w.ll;
+}
+#endif
+
+#ifdef L_ashrdi3
+DWtype
+__ashrdi3 (DWtype u, word_type b)
+{
+  if (b == 0)
+    return u;
+
+  const DWunion uu = {.ll = u};
+  const word_type bm = (sizeof (Wtype) * BITS_PER_UNIT) - b;
+  DWunion w;
+
+  if (bm <= 0)
+    {
+      /* w.s.high = 1..1 or 0..0 */
+      w.s.high = uu.s.high >> (sizeof (Wtype) * BITS_PER_UNIT - 1);
+      w.s.low = uu.s.high >> -bm;
+    }
+  else
+    {
+      const UWtype carries = (UWtype) uu.s.high << bm;
+
+      w.s.high = uu.s.high >> b;
+      w.s.low = ((UWtype) uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}
+#endif
+\f
+#ifdef L_ffssi2
+#undef int
+extern int __ffsSI2 (UWtype u);
+int
+__ffsSI2 (UWtype u)
+{
+  UWtype count;
+
+  if (u == 0)
+    return 0;
+
+  count_trailing_zeros (count, u);
+  return count + 1;
+}
+#endif
+\f
+#ifdef L_ffsdi2
+#undef int
+extern int __ffsDI2 (DWtype u);
+int
+__ffsDI2 (DWtype u)
+{
+  const DWunion uu = {.ll = u};
+  UWtype word, count, add;
+
+  if (uu.s.low != 0)
+    word = uu.s.low, add = 0;
+  else if (uu.s.high != 0)
+    word = uu.s.high, add = BITS_PER_UNIT * sizeof (Wtype);
+  else
+    return 0;
+
+  count_trailing_zeros (count, word);
+  return count + add + 1;
+}
+#endif
+\f
+#ifdef L_muldi3
+DWtype
+__muldi3 (DWtype u, DWtype v)
+{
+  const DWunion uu = {.ll = u};
+  const DWunion vv = {.ll = v};
+  DWunion w = {.ll = __umulsidi3 (uu.s.low, vv.s.low)};
+
+  w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+	       + (UWtype) uu.s.high * (UWtype) vv.s.low);
+
+  return w.ll;
+}
+#endif
+\f
+#if (defined (L_udivdi3) || defined (L_divdi3) || \
+     defined (L_umoddi3) || defined (L_moddi3))
+#if defined (sdiv_qrnnd)
+#define L_udiv_w_sdiv
+#endif
+#endif
+
+#ifdef L_udiv_w_sdiv
+#if defined (sdiv_qrnnd)
+#if (defined (L_udivdi3) || defined (L_divdi3) || \
+     defined (L_umoddi3) || defined (L_moddi3))
+static inline __attribute__ ((__always_inline__))
+#endif
+UWtype
+__udiv_w_sdiv (UWtype *rp, UWtype a1, UWtype a0, UWtype d)
+{
+  UWtype q, r;
+  UWtype c0, c1, b1;
+
+  if ((Wtype) d >= 0)
+    {
+      if (a1 < d - a1 - (a0 >> (W_TYPE_SIZE - 1)))
+	{
+	  /* dividend, divisor, and quotient are nonnegative */
+	  sdiv_qrnnd (q, r, a1, a0, d);
+	}
+      else
+	{
+	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (W_TYPE_SIZE - 1));
+	  /* Divide (c1*2^32 + c0) by d */
+	  sdiv_qrnnd (q, r, c1, c0, d);
+	  /* Add 2^31 to quotient */
+	  q += (UWtype) 1 << (W_TYPE_SIZE - 1);
+	}
+    }
+  else
+    {
+      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
+      c1 = a1 >> 1;			/* A/2 */
+      c0 = (a1 << (W_TYPE_SIZE - 1)) + (a0 >> 1);
+
+      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
+	{
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else if (c1 < b1)			/* So 2^31 <= (A/2)/b1 < 2^32 */
+	{
+	  c1 = (b1 - 1) - c1;
+	  c0 = ~c0;			/* logical NOT */
+
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  q = ~q;			/* (A/2)/b1 */
+	  r = (b1 - 1) - r;
+
+	  r = 2*r + (a0 & 1);		/* A/(2*b1) */
+
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else				/* Implies c1 = b1 */
+	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
+	  if (a0 >= -d)
+	    {
+	      q = -1;
+	      r = a0 + d;
+	    }
+	  else
+	    {
+	      q = -2;
+	      r = a0 + 2*d;
+	    }
+	}
+    }
+
+  *rp = r;
+  return q;
+}
+#else
+/* If sdiv_qrnnd doesn't exist, define dummy __udiv_w_sdiv.  */
+UWtype
+__udiv_w_sdiv (UWtype *rp __attribute__ ((__unused__)),
+	       UWtype a1 __attribute__ ((__unused__)),
+	       UWtype a0 __attribute__ ((__unused__)),
+	       UWtype d __attribute__ ((__unused__)))
+{
+  return 0;
+}
+#endif
+#endif
+\f
+#if (defined (L_udivdi3) || defined (L_divdi3) || \
+     defined (L_umoddi3) || defined (L_moddi3))
+#define L_udivmoddi4
+#endif
+
+#ifdef L_clz
+const UQItype __clz_tab[] =
+{
+  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
+#endif
+\f
+#ifdef L_clzsi2
+#undef int
+extern int __clzSI2 (UWtype x);
+int
+__clzSI2 (UWtype x)
+{
+  Wtype ret;
+
+  count_leading_zeros (ret, x);
+
+  return ret;
+}
+#endif
+\f
+#ifdef L_clzdi2
+#undef int
+extern int __clzDI2 (UDWtype x);
+int
+__clzDI2 (UDWtype x)
+{
+  const DWunion uu = {.ll = x};
+  UWtype word;
+  Wtype ret, add;
+
+  if (uu.s.high)
+    word = uu.s.high, add = 0;
+  else
+    word = uu.s.low, add = W_TYPE_SIZE;
+
+  count_leading_zeros (ret, word);
+  return ret + add;
+}
+#endif
+\f
+#ifdef L_ctzsi2
+#undef int
+extern int __ctzSI2 (UWtype x);
+int
+__ctzSI2 (UWtype x)
+{
+  Wtype ret;
+
+  count_trailing_zeros (ret, x);
+
+  return ret;
+}
+#endif
+\f
+#ifdef L_ctzdi2
+#undef int
+extern int __ctzDI2 (UDWtype x);
+int
+__ctzDI2 (UDWtype x)
+{
+  const DWunion uu = {.ll = x};
+  UWtype word;
+  Wtype ret, add;
+
+  if (uu.s.low)
+    word = uu.s.low, add = 0;
+  else
+    word = uu.s.high, add = W_TYPE_SIZE;
+
+  count_trailing_zeros (ret, word);
+  return ret + add;
+}
+#endif
+
+#if (defined (L_popcountsi2) || defined (L_popcountdi2)	\
+     || defined (L_popcount_tab))
+extern const UQItype __popcount_tab[] ATTRIBUTE_HIDDEN;
+#endif
+
+#ifdef L_popcount_tab
+const UQItype __popcount_tab[] =
+{
+    0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+    1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+    1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+    2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+    1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+    2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+    2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+    3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
+};
+#endif
+\f
+#ifdef L_popcountsi2
+#undef int
+extern int __popcountSI2 (UWtype x);
+int
+__popcountSI2 (UWtype x)
+{
+  UWtype i, ret = 0;
+
+  for (i = 0; i < W_TYPE_SIZE; i += 8)
+    ret += __popcount_tab[(x >> i) & 0xff];
+
+  return ret;
+}
+#endif
+\f
+#ifdef L_popcountdi2
+#undef int
+extern int __popcountDI2 (UDWtype x);
+int
+__popcountDI2 (UDWtype x)
+{
+  UWtype i, ret = 0;
+
+  for (i = 0; i < 2*W_TYPE_SIZE; i += 8)
+    ret += __popcount_tab[(x >> i) & 0xff];
+
+  return ret;
+}
+#endif
+\f
+#ifdef L_paritysi2
+#undef int
+extern int __paritySI2 (UWtype x);
+int
+__paritySI2 (UWtype x)
+{
+#if W_TYPE_SIZE > 64
+# error "fill out the table"
+#endif
+#if W_TYPE_SIZE > 32
+  x ^= x >> 32;
+#endif
+#if W_TYPE_SIZE > 16
+  x ^= x >> 16;
+#endif
+  x ^= x >> 8;
+  x ^= x >> 4;
+  x &= 0xf;
+  return (0x6996 >> x) & 1;
+}
+#endif
+\f
+#ifdef L_paritydi2
+#undef int
+extern int __parityDI2 (UDWtype x);
+int
+__parityDI2 (UDWtype x)
+{
+  const DWunion uu = {.ll = x};
+  UWtype nx = uu.s.low ^ uu.s.high;
+
+#if W_TYPE_SIZE > 64
+# error "fill out the table"
+#endif
+#if W_TYPE_SIZE > 32
+  nx ^= nx >> 32;
+#endif
+#if W_TYPE_SIZE > 16
+  nx ^= nx >> 16;
+#endif
+  nx ^= nx >> 8;
+  nx ^= nx >> 4;
+  nx &= 0xf;
+  return (0x6996 >> nx) & 1;
+}
+#endif
+
+#ifdef L_udivmoddi4
+
+#if (defined (L_udivdi3) || defined (L_divdi3) || \
+     defined (L_umoddi3) || defined (L_moddi3))
+static inline __attribute__ ((__always_inline__))
+#endif
+UDWtype
+__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+{
+  const DWunion nn = {.ll = n};
+  const DWunion dd = {.ll = d};
+  DWunion rr;
+  UWtype d0, d1, n0, n1, n2;
+  UWtype q0, q1;
+  UWtype b, bm;
+
+  d0 = dd.s.low;
+  d1 = dd.s.high;
+  n0 = nn.s.low;
+  n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  udiv_qrnnd (q1, n1, 0, n1, d0);
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0.  */
+	}
+
+      if (rp != 0)
+	{
+	  rr.s.low = n0;
+	  rr.s.high = 0;
+	  *rp = rr.ll;
+	}
+    }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm != 0)
+	    {
+	      /* Normalize, i.e. make the most significant bit of the
+		 denominator set.  */
+
+	      d0 = d0 << bm;
+	      n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+	      n0 = n0 << bm;
+	    }
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0 >> bm.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 leading quotient digit q1 = 1).
+
+		 This special case is necessary, not an optimization.
+		 (Shifts counts of W_TYPE_SIZE are undefined.)  */
+
+	      n1 -= d0;
+	      q1 = 1;
+	    }
+	  else
+	    {
+	      /* Normalize.  */
+
+	      b = W_TYPE_SIZE - bm;
+
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q1, n1, n2, n1, d0);
+	    }
+
+	  /* n1 != d0...  */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0 >> bm.  */
+	}
+
+      if (rp != 0)
+	{
+	  rr.s.low = n0 >> bm;
+	  rr.s.high = 0;
+	  *rp = rr.ll;
+	}
+    }
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+  else
+    {
+      if (d1 > n1)
+	{
+	  /* 00 = nn / DD */
+
+	  q0 = 0;
+	  q1 = 0;
+
+	  /* Remainder in n1n0.  */
+	  if (rp != 0)
+	    {
+	      rr.s.low = n0;
+	      rr.s.high = n1;
+	      *rp = rr.ll;
+	    }
+	}
+      else
+	{
+	  /* 0q = NN / dd */
+
+	  count_leading_zeros (bm, d1);
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 quotient digit q0 = 0 or 1).
+
+		 This special case is necessary, not an optimization.  */
+
+	      /* The condition on the next line takes advantage of that
+		 n1 >= d1 (true due to program flow).  */
+	      if (n1 > d1 || n0 >= d0)
+		{
+		  q0 = 1;
+		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+		}
+	      else
+		q0 = 0;
+
+	      q1 = 0;
+
+	      if (rp != 0)
+		{
+		  rr.s.low = n0;
+		  rr.s.high = n1;
+		  *rp = rr.ll;
+		}
+	    }
+	  else
+	    {
+	      UWtype m1, m0;
+	      /* Normalize.  */
+
+	      b = W_TYPE_SIZE - bm;
+
+	      d1 = (d1 << bm) | (d0 >> b);
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q0, n1, n2, n1, d1);
+	      umul_ppmm (m1, m0, q0, d0);
+
+	      if (m1 > n1 || (m1 == n1 && m0 > n0))
+		{
+		  q0--;
+		  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+		}
+
+	      q1 = 0;
+
+	      /* Remainder in (n1n0 - m1m0) >> bm.  */
+	      if (rp != 0)
+		{
+		  sub_ddmmss (n1, n0, n1, n0, m1, m0);
+		  rr.s.low = (n1 << b) | (n0 >> bm);
+		  rr.s.high = n1 >> bm;
+		  *rp = rr.ll;
+		}
+	    }
+	}
+    }
+
+  const DWunion ww = {{.low = q0, .high = q1}};
+  return ww.ll;
+}
+#endif
+
+#ifdef L_divdi3
+DWtype
+__divdi3 (DWtype u, DWtype v)
+{
+  word_type c = 0;
+  DWunion uu = {.ll = u};
+  DWunion vv = {.ll = v};
+  DWtype w;
+
+  if (uu.s.high < 0)
+    c = ~c,
+    uu.ll = -uu.ll;
+  if (vv.s.high < 0)
+    c = ~c,
+    vv.ll = -vv.ll;
+
+  w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
+  if (c)
+    w = -w;
+
+  return w;
+}
+#endif
+
+#ifdef L_moddi3
+DWtype
+__moddi3 (DWtype u, DWtype v)
+{
+  word_type c = 0;
+  DWunion uu = {.ll = u};
+  DWunion vv = {.ll = v};
+  DWtype w;
+
+  if (uu.s.high < 0)
+    c = ~c,
+    uu.ll = -uu.ll;
+  if (vv.s.high < 0)
+    vv.ll = -vv.ll;
+
+  (void) __udivmoddi4 (uu.ll, vv.ll, &w);
+  if (c)
+    w = -w;
+
+  return w;
+}
+#endif
+
+#ifdef L_umoddi3
+UDWtype
+__umoddi3 (UDWtype u, UDWtype v)
+{
+  UDWtype w;
+
+  (void) __udivmoddi4 (u, v, &w);
+
+  return w;
+}
+#endif
+
+#ifdef L_udivdi3
+UDWtype
+__udivdi3 (UDWtype n, UDWtype d)
+{
+  return __udivmoddi4 (n, d, (UDWtype *) 0);
+}
+#endif
+\f
+#ifdef L_cmpdi2
+word_type
+__cmpdi2 (DWtype a, DWtype b)
+{
+  const DWunion au = {.ll = a};
+  const DWunion bu = {.ll = b};
+
+  if (au.s.high < bu.s.high)
+    return 0;
+  else if (au.s.high > bu.s.high)
+    return 2;
+  if ((UWtype) au.s.low < (UWtype) bu.s.low)
+    return 0;
+  else if ((UWtype) au.s.low > (UWtype) bu.s.low)
+    return 2;
+  return 1;
+}
+#endif
+
+#ifdef L_ucmpdi2
+word_type
+__ucmpdi2 (DWtype a, DWtype b)
+{
+  const DWunion au = {.ll = a};
+  const DWunion bu = {.ll = b};
+
+  if ((UWtype) au.s.high < (UWtype) bu.s.high)
+    return 0;
+  else if ((UWtype) au.s.high > (UWtype) bu.s.high)
+    return 2;
+  if ((UWtype) au.s.low < (UWtype) bu.s.low)
+    return 0;
+  else if ((UWtype) au.s.low > (UWtype) bu.s.low)
+    return 2;
+  return 1;
+}
+#endif
+\f
+#if defined(L_fixunstfdi) && (LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128)
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+DWtype
+__fixunstfDI (TFtype a)
+{
+  if (a < 0)
+    return 0;
+
+  /* Compute high word of result, as a flonum.  */
+  const TFtype b = (a / HIGH_WORD_COEFF);
+  /* Convert that to fixed (but not to DWtype!),
+     and shift it into the high word.  */
+  UDWtype v = (UWtype) b;
+  v <<= WORD_SIZE;
+  /* Remove high part from the TFtype, leaving the low part as flonum.  */
+  a -= (TFtype)v;
+  /* Convert that to fixed (but not to DWtype!) and add it in.
+     Sometimes A comes out negative.  This is significant, since
+     A has more bits than a long int does.  */
+  if (a < 0)
+    v -= (UWtype) (- a);
+  else
+    v += (UWtype) a;
+  return v;
+}
+#endif
+
+#if defined(L_fixtfdi) && (LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128)
+DWtype
+__fixtfdi (TFtype a)
+{
+  if (a < 0)
+    return - __fixunstfDI (-a);
+  return __fixunstfDI (a);
+}
+#endif
+
+#if defined(L_fixunsxfdi) && (LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 96)
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+DWtype
+__fixunsxfDI (XFtype a)
+{
+  if (a < 0)
+    return 0;
+
+  /* Compute high word of result, as a flonum.  */
+  const XFtype b = (a / HIGH_WORD_COEFF);
+  /* Convert that to fixed (but not to DWtype!),
+     and shift it into the high word.  */
+  UDWtype v = (UWtype) b;
+  v <<= WORD_SIZE;
+  /* Remove high part from the XFtype, leaving the low part as flonum.  */
+  a -= (XFtype)v;
+  /* Convert that to fixed (but not to DWtype!) and add it in.
+     Sometimes A comes out negative.  This is significant, since
+     A has more bits than a long int does.  */
+  if (a < 0)
+    v -= (UWtype) (- a);
+  else
+    v += (UWtype) a;
+  return v;
+}
+#endif
+
+#if defined(L_fixxfdi) && (LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 96)
+DWtype
+__fixxfdi (XFtype a)
+{
+  if (a < 0)
+    return - __fixunsxfDI (-a);
+  return __fixunsxfDI (a);
+}
+#endif
+
+#ifdef L_fixunsdfdi
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+DWtype
+__fixunsdfDI (DFtype a)
+{
+  /* Get high part of result.  The division here will just moves the radix
+     point and will not cause any rounding.  Then the conversion to integral
+     type chops result as desired.  */
+  const UWtype hi = a / HIGH_WORD_COEFF;
+
+  /* Get low part of result.  Convert `hi' to floating type and scale it back,
+     then subtract this from the number being converted.  This leaves the low
+     part.  Convert that to integral type.  */
+  const UWtype lo = (a - ((DFtype) hi) * HIGH_WORD_COEFF);
+
+  /* Assemble result from the two parts.  */
+  return ((UDWtype) hi << WORD_SIZE) | lo;
+}
+#endif
+
+#ifdef L_fixdfdi
+DWtype
+__fixdfdi (DFtype a)
+{
+  if (a < 0)
+    return - __fixunsdfDI (-a);
+  return __fixunsdfDI (a);
+}
+#endif
+
+#ifdef L_fixunssfdi
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+DWtype
+__fixunssfDI (SFtype original_a)
+{
+  /* Convert the SFtype to a DFtype, because that is surely not going
+     to lose any bits.  Some day someone else can write a faster version
+     that avoids converting to DFtype, and verify it really works right.  */
+  const DFtype a = original_a;
+
+  /* Get high part of result.  The division here will just moves the radix
+     point and will not cause any rounding.  Then the conversion to integral
+     type chops result as desired.  */
+  const UWtype hi = a / HIGH_WORD_COEFF;
+
+  /* Get low part of result.  Convert `hi' to floating type and scale it back,
+     then subtract this from the number being converted.  This leaves the low
+     part.  Convert that to integral type.  */
+  const UWtype lo = (a - ((DFtype) hi) * HIGH_WORD_COEFF);
+
+  /* Assemble result from the two parts.  */
+  return ((UDWtype) hi << WORD_SIZE) | lo;
+}
+#endif
+
+#ifdef L_fixsfdi
+DWtype
+__fixsfdi (SFtype a)
+{
+  if (a < 0)
+    return - __fixunssfDI (-a);
+  return __fixunssfDI (a);
+}
+#endif
+
+#if defined(L_floatdixf) && (LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 96)
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_HALFWORD_COEFF (((UDWtype) 1) << (WORD_SIZE / 2))
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+XFtype
+__floatdixf (DWtype u)
+{
+  XFtype d = (Wtype) (u >> WORD_SIZE);
+  d *= HIGH_HALFWORD_COEFF;
+  d *= HIGH_HALFWORD_COEFF;
+  d += (UWtype) (u & (HIGH_WORD_COEFF - 1));
+
+  return d;
+}
+#endif
+
+#if defined(L_floatditf) && (LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128)
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_HALFWORD_COEFF (((UDWtype) 1) << (WORD_SIZE / 2))
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+TFtype
+__floatditf (DWtype u)
+{
+  TFtype d = (Wtype) (u >> WORD_SIZE);
+  d *= HIGH_HALFWORD_COEFF;
+  d *= HIGH_HALFWORD_COEFF;
+  d += (UWtype) (u & (HIGH_WORD_COEFF - 1));
+
+  return d;
+}
+#endif
+
+#ifdef L_floatdidf
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_HALFWORD_COEFF (((UDWtype) 1) << (WORD_SIZE / 2))
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+DFtype
+__floatdidf (DWtype u)
+{
+  DFtype d = (Wtype) (u >> WORD_SIZE);
+  d *= HIGH_HALFWORD_COEFF;
+  d *= HIGH_HALFWORD_COEFF;
+  d += (UWtype) (u & (HIGH_WORD_COEFF - 1));
+
+  return d;
+}
+#endif
+
+#ifdef L_floatdisf
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_HALFWORD_COEFF (((UDWtype) 1) << (WORD_SIZE / 2))
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+#define DI_SIZE (sizeof (DWtype) * BITS_PER_UNIT)
+#define DF_SIZE DBL_MANT_DIG
+#define SF_SIZE FLT_MANT_DIG
+
+SFtype
+__floatdisf (DWtype u)
+{
+  /* Protect against double-rounding error.
+     Represent any low-order bits, that might be truncated in DFmode,
+     by a bit that won't be lost.  The bit can go in anywhere below the
+     rounding position of the SFmode.  A fixed mask and bit position
+     handles all usual configurations.  It doesn't handle the case
+     of 128-bit DImode, however.  */
+  if (DF_SIZE < DI_SIZE
+      && DF_SIZE > (DI_SIZE - DF_SIZE + SF_SIZE))
+    {
+#define REP_BIT ((UDWtype) 1 << (DI_SIZE - DF_SIZE))
+      if (! (- ((DWtype) 1 << DF_SIZE) < u
+	     && u < ((DWtype) 1 << DF_SIZE)))
+	{
+	  if ((UDWtype) u & (REP_BIT - 1))
+	    {
+	      u &= ~ (REP_BIT - 1);
+	      u |= REP_BIT;
+	    }
+	}
+    }
+  /* Do the calculation in DFmode
+     so that we don't lose any of the precision of the high word
+     while multiplying it.  */
+  DFtype f = (Wtype) (u >> WORD_SIZE);
+  f *= HIGH_HALFWORD_COEFF;
+  f *= HIGH_HALFWORD_COEFF;
+  f += (UWtype) (u & (HIGH_WORD_COEFF - 1));
+
+  return (SFtype) f;
+}
+#endif
+
+#if defined(L_fixunsxfsi) && LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 96
+/* Reenable the normal types, in case limits.h needs them.  */
+#undef char
+#undef short
+#undef int
+#undef long
+#undef unsigned
+#undef float
+#undef double
+#undef MIN
+#undef MAX
+#include <limits.h>
+
+UWtype
+__fixunsxfSI (XFtype a)
+{
+  if (a >= - (DFtype) Wtype_MIN)
+    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
+  return (Wtype) a;
+}
+#endif
+
+#ifdef L_fixunsdfsi
+/* Reenable the normal types, in case limits.h needs them.  */
+#undef char
+#undef short
+#undef int
+#undef long
+#undef unsigned
+#undef float
+#undef double
+#undef MIN
+#undef MAX
+#include <limits.h>
+
+UWtype
+__fixunsdfSI (DFtype a)
+{
+  if (a >= - (DFtype) Wtype_MIN)
+    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
+  return (Wtype) a;
+}
+#endif
+
+#ifdef L_fixunssfsi
+/* Reenable the normal types, in case limits.h needs them.  */
+#undef char
+#undef short
+#undef int
+#undef long
+#undef unsigned
+#undef float
+#undef double
+#undef MIN
+#undef MAX
+#include <limits.h>
+
+UWtype
+__fixunssfSI (SFtype a)
+{
+  if (a >= - (SFtype) Wtype_MIN)
+    return (Wtype) (a + Wtype_MIN) - Wtype_MIN;
+  return (Wtype) a;
+}
+#endif
+\f
+/* From here on down, the routines use normal data types.  */
+
+#define SItype bogus_type
+#define USItype bogus_type
+#define DItype bogus_type
+#define UDItype bogus_type
+#define SFtype bogus_type
+#define DFtype bogus_type
+#undef Wtype
+#undef UWtype
+#undef HWtype
+#undef UHWtype
+#undef DWtype
+#undef UDWtype
+
+#undef char
+#undef short
+#undef int
+#undef long
+#undef unsigned
+#undef float
+#undef double
+\f
+#ifdef L__gcc_bcmp
+
+/* Like bcmp except the sign is meaningful.
+   Result is negative if S1 is less than S2,
+   positive if S1 is greater, 0 if S1 and S2 are equal.  */
+
+int
+__gcc_bcmp (const unsigned char *s1, const unsigned char *s2, size_t size)
+{
+  while (size > 0)
+    {
+      const unsigned char c1 = *s1++, c2 = *s2++;
+      if (c1 != c2)
+	return c1 - c2;
+      size--;
+    }
+  return 0;
+}
+
+#endif
+\f
+/* __eprintf used to be used by GCC's private version of <assert.h>.
+   We no longer provide that header, but this routine remains in libgcc.a
+   for binary backward compatibility.  Note that it is not included in
+   the shared version of libgcc.  */
+#ifdef L_eprintf
+#ifndef inhibit_libc
+
+#undef NULL /* Avoid errors if stdio.h and our stddef.h mismatch.  */
+#include <stdio.h>
+
+void
+__eprintf (const char *string, const char *expression,
+	   unsigned int line, const char *filename)
+{
+  fprintf (stderr, string, expression, line, filename);
+  fflush (stderr);
+  abort ();
+}
+
+#endif
+#endif
+
+\f
+#ifdef L_clear_cache
+/* Clear part of an instruction cache.  */
+
+void
+__clear_cache (char *beg __attribute__((__unused__)),
+	       char *end __attribute__((__unused__)))
+{
+#ifdef CLEAR_INSN_CACHE
+  CLEAR_INSN_CACHE (beg, end);
+#endif /* CLEAR_INSN_CACHE */
+}
+
+#endif /* L_clear_cache */
+\f
+#ifdef L_trampoline
+
+/* Jump to a trampoline, loading the static chain address.  */
+
+#if defined(WINNT) && ! defined(__CYGWIN__) && ! defined (_UWIN)
+
+long
+getpagesize (void)
+{
+#ifdef _ALPHA_
+  return 8192;
+#else
+  return 4096;
+#endif
+}
+
+#ifdef __i386__
+extern int VirtualProtect (char *, int, int, int *) __attribute__((stdcall));
+#endif
+
+int
+mprotect (char *addr, int len, int prot)
+{
+  int np, op;
+
+  if (prot == 7)
+    np = 0x40;
+  else if (prot == 5)
+    np = 0x20;
+  else if (prot == 4)
+    np = 0x10;
+  else if (prot == 3)
+    np = 0x04;
+  else if (prot == 1)
+    np = 0x02;
+  else if (prot == 0)
+    np = 0x01;
+
+  if (VirtualProtect (addr, len, np, &op))
+    return 0;
+  else
+    return -1;
+}
+
+#endif /* WINNT && ! __CYGWIN__ && ! _UWIN */
+
+#ifdef TRANSFER_FROM_TRAMPOLINE
+TRANSFER_FROM_TRAMPOLINE
+#endif
+#endif /* L_trampoline */
+\f
+#ifndef __CYGWIN__
+#ifdef L__main
+
+#include "gbl-ctors.h"
+/* Some systems use __main in a way incompatible with its use in gcc, in these
+   cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
+   give the same symbol without quotes for an alternative entry point.  You
+   must define both, or neither.  */
+#ifndef NAME__MAIN
+#define NAME__MAIN "__main"
+#define SYMBOL__MAIN __main
+#endif
+
+#ifdef INIT_SECTION_ASM_OP
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+#endif
+
+#if !defined (HAS_INIT_SECTION) || !defined (OBJECT_FORMAT_ELF)
+
+/* Some ELF crosses use crtstuff.c to provide __CTOR_LIST__, but use this
+   code to run constructors.  In that case, we need to handle EH here, too.  */
+
+#ifdef EH_FRAME_SECTION_NAME
+#include "unwind-dw2-fde.h"
+extern unsigned char __EH_FRAME_BEGIN__[];
+#endif
+
+/* Run all the global destructors on exit from the program.  */
+
+void
+__do_global_dtors (void)
+{
+#ifdef DO_GLOBAL_DTORS_BODY
+  DO_GLOBAL_DTORS_BODY;
+#else
+  static func_ptr *p = __DTOR_LIST__ + 1;
+  while (*p)
+    {
+      p++;
+      (*(p-1)) ();
+    }
+#endif
+#if defined (EH_FRAME_SECTION_NAME) && !defined (HAS_INIT_SECTION)
+  {
+    static int completed = 0;
+    if (! completed)
+      {
+	completed = 1;
+	__deregister_frame_info (__EH_FRAME_BEGIN__);
+      }
+  }
+#endif
+}
+#endif
+
+#ifndef HAS_INIT_SECTION
+/* Run all the global constructors on entry to the program.  */
+
+void
+__do_global_ctors (void)
+{
+#ifdef EH_FRAME_SECTION_NAME
+  {
+    static struct object object;
+    __register_frame_info (__EH_FRAME_BEGIN__, &object);
+  }
+#endif
+  DO_GLOBAL_CTORS_BODY;
+  atexit (__do_global_dtors);
+}
+#endif /* no HAS_INIT_SECTION */
+
+#if !defined (HAS_INIT_SECTION) || defined (INVOKE__main)
+/* Subroutine called automatically by `main'.
+   Compiling a global function named `main'
+   produces an automatic call to this function at the beginning.
+
+   For many systems, this routine calls __do_global_ctors.
+   For systems which support a .init section we use the .init section
+   to run __do_global_ctors, so we need not do anything here.  */
+
+extern void SYMBOL__MAIN (void);
+void
+SYMBOL__MAIN (void)
+{
+  /* Support recursive calls to `main': run initializers just once.  */
+  static int initialized;
+  if (! initialized)
+    {
+      initialized = 1;
+      __do_global_ctors ();
+    }
+}
+#endif /* no HAS_INIT_SECTION or INVOKE__main */
+
+#endif /* L__main */
+#endif /* __CYGWIN__ */
+\f
+#ifdef L_ctors
+
+#include "gbl-ctors.h"
+
+/* Provide default definitions for the lists of constructors and
+   destructors, so that we don't get linker errors.  These symbols are
+   intentionally bss symbols, so that gld and/or collect will provide
+   the right values.  */
+
+/* We declare the lists here with two elements each,
+   so that they are valid empty lists if no other definition is loaded.
+
+   If we are using the old "set" extensions to have the gnu linker
+   collect ctors and dtors, then we __CTOR_LIST__ and __DTOR_LIST__
+   must be in the bss/common section.
+
+   Long term no port should use those extensions.  But many still do.  */
+#if !defined(INIT_SECTION_ASM_OP) && !defined(CTOR_LISTS_DEFINED_EXTERNALLY)
+#if defined (TARGET_ASM_CONSTRUCTOR) || defined (USE_COLLECT2)
+func_ptr __CTOR_LIST__[2] = {0, 0};
+func_ptr __DTOR_LIST__[2] = {0, 0};
+#else
+func_ptr __CTOR_LIST__[2];
+func_ptr __DTOR_LIST__[2];
+#endif
+#endif /* no INIT_SECTION_ASM_OP and not CTOR_LISTS_DEFINED_EXTERNALLY */
+#endif /* L_ctors */
+
--- ./gcc/gcc/libgcc2.h
+++ ./gcc/gcc/libgcc2.h
@@ -0,0 +1,310 @@
+/* Header file for libgcc2.c.  */
+/* Copyright (C) 2000, 2001
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.  */
+
+
+#ifndef GCC_LIBGCC2_H
+#define GCC_LIBGCC2_H
+
+extern int __gcc_bcmp (const unsigned char *, const unsigned char *, size_t);
+extern void __clear_cache (char *, char *);
+extern void __eprintf (const char *, const char *, unsigned int, const char *)
+  __attribute__ ((__noreturn__));
+
+struct exception_descriptor;
+extern short int __get_eh_table_language (struct exception_descriptor *);
+extern short int __get_eh_table_version (struct exception_descriptor *);
+
+/* Permit the tm.h file to select the endianness to use just for this
+   file.  This is used when the endianness is determined when the
+   compiler is run.  */
+
+#ifndef LIBGCC2_WORDS_BIG_ENDIAN
+#define LIBGCC2_WORDS_BIG_ENDIAN WORDS_BIG_ENDIAN
+#endif
+
+#ifndef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE LONG_DOUBLE_TYPE_SIZE
+#endif
+
+#ifndef MIN_UNITS_PER_WORD
+#define MIN_UNITS_PER_WORD UNITS_PER_WORD
+#endif
+
+/* In the first part of this file, we are interfacing to calls generated
+   by the compiler itself.  These calls pass values into these routines
+   which have very specific modes (rather than very specific types), and
+   these compiler-generated calls also expect any return values to have
+   very specific modes (rather than very specific types).  Thus, we need
+   to avoid using regular C language type names in this part of the file
+   because the sizes for those types can be configured to be anything.
+   Instead we use the following special type names.  */
+
+typedef		 int QItype	__attribute__ ((mode (QI)));
+typedef unsigned int UQItype	__attribute__ ((mode (QI)));
+typedef		 int HItype	__attribute__ ((mode (HI)));
+typedef unsigned int UHItype	__attribute__ ((mode (HI)));
+#if MIN_UNITS_PER_WORD > 1
+/* These typedefs are usually forbidden on dsp's with UNITS_PER_WORD 1.  */
+typedef 	 int SItype	__attribute__ ((mode (SI)));
+typedef unsigned int USItype	__attribute__ ((mode (SI)));
+#if LONG_LONG_TYPE_SIZE > 32
+/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 2.  */
+typedef		 int DItype	__attribute__ ((mode (DI)));
+typedef unsigned int UDItype	__attribute__ ((mode (DI)));
+#if MIN_UNITS_PER_WORD > 4
+/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 4.  */
+typedef		 int TItype	__attribute__ ((mode (TI)));
+typedef unsigned int UTItype	__attribute__ ((mode (TI)));
+#endif
+#endif
+#endif
+
+#if BITS_PER_UNIT == 8
+
+typedef 	float SFtype	__attribute__ ((mode (SF)));
+typedef		float DFtype	__attribute__ ((mode (DF)));
+
+#if LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 96
+typedef		float XFtype	__attribute__ ((mode (XF)));
+#endif
+#if LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128
+typedef		float TFtype	__attribute__ ((mode (TF)));
+#endif
+
+#else /* BITS_PER_UNIT != 8 */
+
+/* On dsp's there are usually qf/hf/tqf modes used instead of the above.
+   For now we don't support them in libgcc2.c.  */
+
+#undef L_fixdfdi
+#undef L_fixsfdi
+#undef L_fixtfdi
+#undef L_fixunsdfdi
+#undef L_fixunsdfsi
+#undef L_fixunssfdi
+#undef L_fixunssfsi
+#undef L_fixunstfdi
+#undef L_fixunsxfdi
+#undef L_fixunsxfsi
+#undef L_fixxfdi
+#undef L_floatdidf
+#undef L_floatdisf
+#undef L_floatditf
+#undef L_floatdixf
+
+#endif /* BITS_PER_UNIT != 8 */
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+/* Make sure that we don't accidentally use any normal C language built-in
+   type names in the first part of this file.  Instead we want to use *only*
+   the type names defined above.  The following macro definitions insure
+   that if we *do* accidentally use some normal C language built-in type name,
+   we will get a syntax error.  */
+
+#define char bogus_type
+#define short bogus_type
+#define int bogus_type
+#define long bogus_type
+#define unsigned bogus_type
+#define float bogus_type
+#define double bogus_type
+
+#if MIN_UNITS_PER_WORD > 4
+#define W_TYPE_SIZE (8 * BITS_PER_UNIT)
+#define Wtype	DItype
+#define UWtype	UDItype
+#define HWtype	DItype
+#define UHWtype	UDItype
+#define DWtype	TItype
+#define UDWtype	UTItype
+#define __NW(a,b)	__ ## a ## di ## b
+#define __NDW(a,b)	__ ## a ## ti ## b
+#elif MIN_UNITS_PER_WORD > 2 \
+      || (MIN_UNITS_PER_WORD > 1 && LONG_LONG_TYPE_SIZE > 32)
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define Wtype	SItype
+#define UWtype	USItype
+#define HWtype	SItype
+#define UHWtype	USItype
+#define DWtype	DItype
+#define UDWtype	UDItype
+#define __NW(a,b)	__ ## a ## si ## b
+#define __NDW(a,b)	__ ## a ## di ## b
+#elif MIN_UNITS_PER_WORD > 1
+#define W_TYPE_SIZE (2 * BITS_PER_UNIT)
+#define Wtype	HItype
+#define UWtype	UHItype
+#define HWtype	HItype
+#define UHWtype	UHItype
+#define DWtype	SItype
+#define UDWtype	USItype
+#define __NW(a,b)	__ ## a ## hi ## b
+#define __NDW(a,b)	__ ## a ## si ## b
+#else
+#define W_TYPE_SIZE BITS_PER_UNIT
+#define Wtype	QItype
+#define UWtype  UQItype
+#define HWtype	QItype
+#define UHWtype	UQItype
+#define DWtype	HItype
+#define UDWtype	UHItype
+#define __NW(a,b)	__ ## a ## qi ## b
+#define __NDW(a,b)	__ ## a ## hi ## b
+#endif
+
+#define Wtype_MAX ((Wtype)(((UWtype)1 << (W_TYPE_SIZE - 1)) - 1))
+#define Wtype_MIN (- Wtype_MAX - 1)
+
+#define __muldi3	__NDW(mul,3)
+#define __divdi3	__NDW(div,3)
+#define __udivdi3	__NDW(udiv,3)
+#define __moddi3	__NDW(mod,3)
+#define __umoddi3	__NDW(umod,3)
+#define __negdi2	__NDW(neg,2)
+#define __lshrdi3	__NDW(lshr,3)
+#define __ashldi3	__NDW(ashl,3)
+#define __ashrdi3	__NDW(ashr,3)
+#define __cmpdi2	__NDW(cmp,2)
+#define __ucmpdi2	__NDW(ucmp,2)
+#define __udivmoddi4	__NDW(udivmod,4)
+#define __fixunstfDI	__NDW(fixunstf,)
+#define __fixtfdi	__NDW(fixtf,)
+#define __fixunsxfDI	__NDW(fixunsxf,)
+#define __fixxfdi	__NDW(fixxf,)
+#define __fixunsdfDI	__NDW(fixunsdf,)
+#define __fixdfdi	__NDW(fixdf,)
+#define __fixunssfDI	__NDW(fixunssf,)
+#define __fixsfdi	__NDW(fixsf,)
+#define __floatdixf	__NDW(float,xf)
+#define __floatditf	__NDW(float,tf)
+#define __floatdidf	__NDW(float,df)
+#define __floatdisf	__NDW(float,sf)
+#define __fixunsxfSI	__NW(fixunsxf,)
+#define __fixunstfSI	__NW(fixunstf,)
+#define __fixunsdfSI	__NW(fixunsdf,)
+#define __fixunssfSI	__NW(fixunssf,)
+
+#define __ffsSI2	__NW(ffs,2)
+#define __clzSI2	__NW(clz,2)
+#define __ctzSI2	__NW(ctz,2)
+#define __popcountSI2	__NW(popcount,2)
+#define __paritySI2	__NW(parity,2)
+#define __ffsDI2	__NDW(ffs,2)
+#define __clzDI2	__NDW(clz,2)
+#define __ctzDI2	__NDW(ctz,2)
+#define __popcountDI2	__NDW(popcount,2)
+#define __parityDI2	__NDW(parity,2)
+
+extern DWtype __muldi3 (DWtype, DWtype);
+extern DWtype __divdi3 (DWtype, DWtype);
+extern UDWtype __udivdi3 (UDWtype, UDWtype);
+extern UDWtype __umoddi3 (UDWtype, UDWtype);
+extern DWtype __moddi3 (DWtype, DWtype);
+
+/* __udivmoddi4 is static inline when building other libgcc2 portions.  */
+#if (!defined (L_udivdi3) && !defined (L_divdi3) && \
+     !defined (L_umoddi3) && !defined (L_moddi3))
+extern UDWtype __udivmoddi4 (UDWtype, UDWtype, UDWtype *);
+#endif
+
+/* __negdi2 is static inline when building other libgcc2 portions.  */
+#if !defined(L_divdi3) && !defined(L_moddi3)
+extern DWtype __negdi2 (DWtype);
+#endif
+
+extern DWtype __lshrdi3 (DWtype, word_type);
+extern DWtype __ashldi3 (DWtype, word_type);
+extern DWtype __ashrdi3 (DWtype, word_type);
+
+/* __udiv_w_sdiv is static inline when building other libgcc2 portions.  */
+#if (!defined(L_udivdi3) && !defined(L_divdi3) && \
+     !defined(L_umoddi3) && !defined(L_moddi3))
+extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype);
+#endif
+
+extern word_type __cmpdi2 (DWtype, DWtype);
+extern word_type __ucmpdi2 (DWtype, DWtype);
+
+extern Wtype __absvsi2 (Wtype);
+extern DWtype __absvdi2 (DWtype);
+extern Wtype __addvsi3 (Wtype, Wtype);
+extern DWtype __addvdi3 (DWtype, DWtype);
+extern Wtype __subvsi3 (Wtype, Wtype);
+extern DWtype __subvdi3 (DWtype, DWtype);
+extern Wtype __mulvsi3 (Wtype, Wtype);
+extern DWtype __mulvdi3 (DWtype, DWtype);
+extern Wtype __negvsi2 (Wtype);
+extern DWtype __negvdi2 (DWtype);
+
+#if BITS_PER_UNIT == 8
+extern DWtype __fixdfdi (DFtype);
+extern DWtype __fixsfdi (SFtype);
+extern DFtype __floatdidf (DWtype);
+extern SFtype __floatdisf (DWtype);
+extern UWtype __fixunsdfSI (DFtype);
+extern UWtype __fixunssfSI (SFtype);
+extern DWtype __fixunsdfDI (DFtype);
+extern DWtype __fixunssfDI (SFtype);
+
+#if LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 96
+extern DWtype __fixxfdi (XFtype);
+extern DWtype __fixunsxfDI (XFtype);
+extern XFtype __floatdixf (DWtype);
+extern UWtype __fixunsxfSI (XFtype);
+#endif
+
+#if LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128
+extern DWtype __fixunstfDI (TFtype);
+extern DWtype __fixtfdi (TFtype);
+extern TFtype __floatditf (DWtype);
+#endif
+#endif /* BITS_PER_UNIT == 8 */
+
+/* DWstructs are pairs of Wtype values in the order determined by
+   LIBGCC2_WORDS_BIG_ENDIAN.  */
+
+#if LIBGCC2_WORDS_BIG_ENDIAN
+  struct DWstruct {Wtype high, low;};
+#else
+  struct DWstruct {Wtype low, high;};
+#endif
+
+/* We need this union to unpack/pack DImode values, since we don't have
+   any arithmetic yet.  Incoming DImode parameters are stored into the
+   `ll' field, and the unpacked result is read from the struct `s'.  */
+
+typedef union
+{
+  struct DWstruct s;
+  DWtype ll;
+} DWunion;
+
+#include "longlong.h"
+
+#endif /* ! GCC_LIBGCC2_H */
--- ./gcc/gcc/longlong.h
+++ ./gcc/gcc/longlong.h
@@ -0,0 +1,1360 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+   Free Software Foundation, Inc.
+
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* You have to define the following before including this file:
+
+   UWtype -- An unsigned type, default type for operations (typically a "word")
+   UHWtype -- An unsigned type, at least half the size of UWtype.
+   UDWtype -- An unsigned type, at least twice as large a UWtype
+   W_TYPE_SIZE -- size in bits of UWtype
+
+   UQItype -- Unsigned 8 bit type.
+   SItype, USItype -- Signed and unsigned 32 bit types.
+   DItype, UDItype -- Signed and unsigned 64 bit types.
+
+   On a 32 bit machine UWtype should typically be USItype;
+   on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#ifndef W_TYPE_SIZE
+#define W_TYPE_SIZE	32
+#define UWtype		USItype
+#define UHWtype		USItype
+#define UDWtype		UDItype
+#endif
+
+/* Define auxiliary asm macros.
+
+   1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+   UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+   word product in HIGH_PROD and LOW_PROD.
+
+   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+   UDWtype product.  This is just a variant of umul_ppmm.
+
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+   is rounded towards 0.
+
+   5) count_leading_zeros(count, x) counts the number of zero-bits from the
+   msb to the first nonzero bit in the UWtype X.  This is the number of
+   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+   from the least significant end.
+
+   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two UWtype integers, composed by
+   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+   (i.e. carry out) is not stored anywhere, and is lost.
+
+   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+
+   Please add support for more CPUs here, or improve the current support
+   for the CPUs below!
+   (E.g. WE32100, IBM360.)  */
+
+#if defined (__GNUC__) && !defined (NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+   understood by gcc1.  Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (ph) = __builtin_alpha_umulh (__m0, __m1);				\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UDItype __r;							\
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
+    (r) = __r;								\
+  } while (0)
+extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#ifdef __alpha_cix__
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clzl (X))
+#define count_trailing_zeros(COUNT,X)	((COUNT) = __builtin_ctzl (X))
+#define COUNT_LEADING_ZEROS_0 64
+#else
+extern const UQItype __clz_tab[] ATTRIBUTE_HIDDEN;
+#define count_leading_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __t = __builtin_alpha_cmpbge (0, __xr);				\
+    __a = __clz_tab[__t ^ 0xff] - 1;					\
+    __t = __builtin_alpha_extbl (__xr, __a);				\
+    (COUNT) = 64 - (__clz_tab[__t] + __a*8);				\
+  } while (0)
+#define count_trailing_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __t = __builtin_alpha_cmpbge (0, __xr);				\
+    __t = ~__t & -~__t;							\
+    __a = ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    __t = __builtin_alpha_extbl (__xr, __a);				\
+    __a <<= 3;								\
+    __t &= -__t;							\
+    __a += ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    (COUNT) = __a;							\
+  } while (0)
+#endif /* __alpha_cix__ */
+#endif /* __alpha */
+
+#if defined (__arc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.f	%1, %4, %5\n\tadc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rIJ" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rIJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.f	%1, %4, %5\n\tsbc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rIJ" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rIJ" ((USItype) (bl)))
+/* Call libgcc routine.  */
+#define umul_ppmm(w1, w0, u, v) \
+do {									\
+  DWunion __w;								\
+  __w.ll = __umulsidi3 (u, v);						\
+  w1 = __w.s.high;							\
+  w0 = __w.s.low;							\
+} while (0)
+#define __umulsidi3 __umulsidi3
+UDItype __umulsidi3 (USItype, USItype);
+#endif
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds	%1, %4, %5\n\tadc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs	%1, %4, %5\n\tsbc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;					\
+  __asm__ ("%@ Inlined umul_ppmm\n"					\
+	   "	mov	%2, %5, lsr #16\n"				\
+	   "	mov	%0, %6, lsr #16\n"				\
+	   "	bic	%3, %5, %2, lsl #16\n"				\
+	   "	bic	%4, %6, %0, lsl #16\n"				\
+	   "	mul	%1, %3, %4\n"					\
+	   "	mul	%4, %2, %4\n"					\
+	   "	mul	%3, %0, %3\n"					\
+	   "	mul	%0, %2, %0\n"					\
+	   "	adds	%3, %4, %3\n"					\
+	   "	addcs	%0, %0, #65536\n"				\
+	   "	adds	%1, %1, %3, lsl #16\n"				\
+	   "	adc	%0, %0, %3, lsr #16"				\
+	   : "=&r" ((USItype) (xh)),					\
+	     "=r" ((USItype) (xl)),					\
+	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
+	   : "r" ((USItype) (a)),					\
+	     "r" ((USItype) (b)));}
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rM" ((USItype) (ah)),					\
+	     "rM" ((USItype) (bh)),					\
+	     "%rM" ((USItype) (al)),					\
+	     "rM" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rM" ((USItype) (ah)),					\
+	     "rM" ((USItype) (bh)),					\
+	     "rM" ((USItype) (al)),					\
+	     "rM" ((USItype) (bl)))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    union								\
+      {									\
+	UDItype __f;							\
+	struct {USItype __w1, __w0;} __w1w0;				\
+      } __t;								\
+    __asm__ ("xmpyu %1,%2,%0"						\
+	     : "=x" (__t.__f)						\
+	     : "x" ((USItype) (u)),					\
+	       "x" ((USItype) (v)));					\
+    (w1) = __t.__w1w0.__w1;						\
+    (w0) = __t.__w1w0.__w0;						\
+     } while (0)
+#define UMUL_TIME 8
+#else
+#define UMUL_TIME 30
+#endif
+#define UDIV_TIME 40
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __tmp;							\
+    __asm__ (								\
+       "ldi		1,%0\n"						\
+"	extru,=		%1,15,16,%%r0		; Bits 31..16 zero?\n"	\
+"	extru,tr	%1,15,16,%1		; No.  Shift down, skip add.\n"\
+"	ldo		16(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,23,8,%%r0		; Bits 15..8 zero?\n"	\
+"	extru,tr	%1,23,8,%1		; No.  Shift down, skip add.\n"\
+"	ldo		8(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,27,4,%%r0		; Bits 7..4 zero?\n"	\
+"	extru,tr	%1,27,4,%1		; No.  Shift down, skip add.\n"\
+"	ldo		4(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,29,2,%%r0		; Bits 3..2 zero?\n"	\
+"	extru,tr	%1,29,2,%1		; No.  Shift down, skip add.\n"\
+"	ldo		2(%0),%0		; Yes.  Perform add.\n"	\
+"	extru		%1,30,1,%1		; Extract bit 1.\n"	\
+"	sub		%0,%1,%0		; Subtract it.\n"	\
+	: "=r" (count), "=r" (__tmp) : "1" (x));			\
+  } while (0)
+#endif
+
+#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#define smul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("lr %N0,%1\n\tmr %0,%2"					\
+	     : "=&r" (__x.__ll)						\
+	     : "r" (m0), "r" (m1));					\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("dr %0,%2"							\
+	     : "=r" (__x.__ll)						\
+	     : "0" (__x.__ll), "r" (d));				\
+    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
+  } while (0)
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl %5,%1\n\tadcl %3,%0"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl %5,%1\n\tsbbl %3,%0"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mull %3"							\
+	   : "=a" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "rm" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("divl %4"							\
+	   : "=a" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "rm" ((USItype) (dv)))
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("bsrl %1,%0"						\
+	     : "=r" (__cbtmp) : "rm" ((USItype) (x)));			\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#define count_trailing_zeros(count, x) \
+  __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#define UMUL_TIME 40
+#define UDIV_TIME 40
+#endif /* 80x86 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __asm__ ("emul	%2,%1,%0"					\
+	   : "=d" (__xx.__ll)						\
+	   : "%dI" ((USItype) (u)),					\
+	     "dI" ((USItype) (v)));					\
+  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("emul	%2,%1,%0"					\
+	     : "=d" (__w)						\
+	     : "%dI" ((USItype) (u)),					\
+	       "dI" ((USItype) (v)));					\
+    __w; })
+#endif /* __i960__ */
+
+#if defined (__M32R__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  /* The cmp clears the condition bit.  */ \
+  __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl))					\
+	   : "cbit")
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  /* The cmp clears the condition bit.  */ \
+  __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl))					\
+	   : "cbit")
+#endif /* __M32R__ */
+
+#if defined (__mc68000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
+	   : "=d" ((USItype) (sh)),					\
+	     "=&d" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "d" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
+	   : "=d" ((USItype) (sh)),					\
+	     "=&d" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "d" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+
+/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
+#if defined (__mc68020__) || defined(mc68020) \
+	|| defined(__mc68030__) || defined(mc68030) \
+	|| defined(__mc68040__) || defined(mc68040) \
+	|| defined(__mcpu32__) || defined(mcpu32)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulu%.l %3,%1:%0"						\
+	   : "=d" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "dmi" ((USItype) (v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divu%.l %4,%1:%0"						\
+	   : "=d" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "dmi" ((USItype) (d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divs%.l %4,%1:%0"						\
+	   : "=d" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "dmi" ((USItype) (d)))
+
+#else /* not mc68020 */
+#if defined(__mcoldfire__)
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("| Inlined umul_ppmm\n"					\
+	   "	move%.l	%2,%/d0\n"					\
+	   "	move%.l	%3,%/d1\n"					\
+	   "	move%.l	%/d0,%/d2\n"					\
+	   "	swap	%/d0\n"						\
+	   "	move%.l	%/d1,%/d3\n"					\
+	   "	swap	%/d1\n"						\
+	   "	move%.w	%/d2,%/d4\n"					\
+	   "	mulu	%/d3,%/d4\n"					\
+	   "	mulu	%/d1,%/d2\n"					\
+	   "	mulu	%/d0,%/d3\n"					\
+	   "	mulu	%/d0,%/d1\n"					\
+	   "	move%.l	%/d4,%/d0\n"					\
+	   "	clr%.w	%/d0\n"						\
+	   "	swap	%/d0\n"						\
+	   "	add%.l	%/d0,%/d2\n"					\
+	   "	add%.l	%/d3,%/d2\n"					\
+	   "	jcc	1f\n"						\
+	   "	add%.l	%#65536,%/d1\n"					\
+	   "1:	swap	%/d2\n"						\
+	   "	moveq	%#0,%/d0\n"					\
+	   "	move%.w	%/d2,%/d0\n"					\
+	   "	move%.w	%/d4,%/d2\n"					\
+	   "	move%.l	%/d2,%1\n"					\
+	   "	add%.l	%/d1,%/d0\n"					\
+	   "	move%.l	%/d0,%0"					\
+	   : "=g" ((USItype) (xh)),					\
+	     "=g" ((USItype) (xl))					\
+	   : "g" ((USItype) (a)),					\
+	     "g" ((USItype) (b))					\
+	   : "d0", "d1", "d2", "d3", "d4")
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#else /* not ColdFire */
+/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("| Inlined umul_ppmm\n"					\
+	   "	move%.l	%2,%/d0\n"					\
+	   "	move%.l	%3,%/d1\n"					\
+	   "	move%.l	%/d0,%/d2\n"					\
+	   "	swap	%/d0\n"						\
+	   "	move%.l	%/d1,%/d3\n"					\
+	   "	swap	%/d1\n"						\
+	   "	move%.w	%/d2,%/d4\n"					\
+	   "	mulu	%/d3,%/d4\n"					\
+	   "	mulu	%/d1,%/d2\n"					\
+	   "	mulu	%/d0,%/d3\n"					\
+	   "	mulu	%/d0,%/d1\n"					\
+	   "	move%.l	%/d4,%/d0\n"					\
+	   "	eor%.w	%/d0,%/d0\n"					\
+	   "	swap	%/d0\n"						\
+	   "	add%.l	%/d0,%/d2\n"					\
+	   "	add%.l	%/d3,%/d2\n"					\
+	   "	jcc	1f\n"						\
+	   "	add%.l	%#65536,%/d1\n"					\
+	   "1:	swap	%/d2\n"						\
+	   "	moveq	%#0,%/d0\n"					\
+	   "	move%.w	%/d2,%/d0\n"					\
+	   "	move%.w	%/d4,%/d2\n"					\
+	   "	move%.l	%/d2,%1\n"					\
+	   "	add%.l	%/d1,%/d0\n"					\
+	   "	move%.l	%/d0,%0"					\
+	   : "=g" ((USItype) (xh)),					\
+	     "=g" ((USItype) (xl))					\
+	   : "g" ((USItype) (a)),					\
+	     "g" ((USItype) (b))					\
+	   : "d0", "d1", "d2", "d3", "d4")
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not ColdFire */
+#endif /* not mc68020 */
+
+/* The '020, '030, '040 and '060 have bitfield insns.
+   cpu32 disguises as a 68020, but lacks them.  */
+#if ( defined (__mc68020__) || defined(mc68020) \
+		|| defined(__mc68030__) || defined(mc68030) \
+		|| defined(__mc68040__) || defined(mc68040) \
+		|| defined(__mc68060__) || defined(mc68060) ) \
+	&& !defined(__mcpu32__)
+#define count_leading_zeros(count, x) \
+  __asm__ ("bfffo %1{%b2:%b2},%0"					\
+	   : "=d" ((USItype) (count))					\
+	   : "od" ((USItype) (x)), "n" (0))
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rJ" ((USItype) (ah)),					\
+	     "rJ" ((USItype) (bh)),					\
+	     "%rJ" ((USItype) (al)),					\
+	     "rJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rJ" ((USItype) (ah)),					\
+	     "rJ" ((USItype) (bh)),					\
+	     "rJ" ((USItype) (al)),					\
+	     "rJ" ((USItype) (bl)))
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("ff1 %0,%1"						\
+	     : "=r" (__cbtmp)						\
+	     : "r" ((USItype) (x)));					\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__mc88110__)
+#define umul_ppmm(wh, wl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __xx;							\
+    __asm__ ("mulu.d	%0,%1,%2"					\
+	     : "=r" (__xx.__ll)						\
+	     : "r" ((USItype) (u)),					\
+	       "r" ((USItype) (v)));					\
+    (wh) = __xx.__i.__h;						\
+    (wl) = __xx.__i.__l;						\
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __xx;							\
+  USItype __q;								\
+  __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
+  __asm__ ("divu.d %0,%1,%2"						\
+	   : "=r" (__q)							\
+	   : "r" (__xx.__ll),						\
+	     "r" ((USItype) (d)));					\
+  (r) = (n0) - __q * (d); (q) = __q; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __mc88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("multu %2,%3"						\
+	   : "=l" ((USItype) (w0)),					\
+	     "=h" ((USItype) (w1))					\
+	   : "d" ((USItype) (u)),					\
+	     "d" ((USItype) (v)))
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __asm__ ("meid %2,%0"							\
+	   : "=g" (__xx.__ll)						\
+	   : "%0" ((USItype) (u)),					\
+	     "g" ((USItype) (v)));					\
+  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("meid %2,%0"						\
+	     : "=g" (__w)						\
+	     : "%0" ((USItype) (u)),					\
+	       "g" ((USItype) (v)));					\
+    __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
+  __asm__ ("deid %2,%0"							\
+	   : "=g" (__xx.__ll)						\
+	   : "0" (__xx.__ll),						\
+	     "g" ((USItype) (d)));					\
+  (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count,x) \
+  do {									\
+    __asm__ ("ffsd     %2,%0"						\
+            : "=r" ((USItype) (count))					\
+            : "0" ((USItype) 0),					\
+              "r" ((USItype) (x)));					\
+  } while (0)
+#endif /* __ns32000__ */
+
+/* FIXME: We should test _IBMR2 here when we add assembly support for the
+   system vendor compilers.
+   FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
+   enough, since that hits ARM and m68k too.  */
+#if (defined (_ARCH_PPC)	/* AIX */				\
+     || defined (_ARCH_PWR)	/* AIX */				\
+     || defined (_ARCH_COM)	/* AIX */				\
+     || defined (__powerpc__)	/* gcc */				\
+     || defined (__POWERPC__)	/* BEOS */				\
+     || defined (__ppc__)	/* Darwin */				\
+     || defined (PPC)		/* GNU/Linux, SysV */			\
+     ) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
+  || defined (__ppc__) || defined (PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    SItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#elif defined (_ARCH_PWR)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+  __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#define UDIV_TIME 100
+#endif
+#endif /* 32-bit POWER architecture variants.  */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+   vendor compilers.  */
+#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    DItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define SMUL_TIME 14  /* ??? */
+#define UDIV_TIME 120 /* ??? */
+#endif /* 64-bit PowerPC.  */
+
+#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("a %1,%5\n\tae %0,%3"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("s %1,%5\n\tse %0,%3"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ (								\
+       "s	r2,r2\n"						\
+"	mts	r10,%2\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	cas	%0,r2,r0\n"						\
+"	mfs	r10,%1"							\
+	     : "=r" ((USItype) (ph)),					\
+	       "=r" ((USItype) (pl))					\
+	     : "%r" (__m0),						\
+		"r" (__m1)						\
+	     : "r2");							\
+    (ph) += ((((SItype) __m0 >> 31) & __m1)				\
+	     + (((SItype) __m1 >> 31) & __m0));				\
+  } while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+  do {									\
+    if ((x) >= 0x10000)							\
+      __asm__ ("clz	%0,%1"						\
+	       : "=r" ((USItype) (count))				\
+	       : "r" ((USItype) (x) >> 16));				\
+    else								\
+      {									\
+	__asm__ ("clz	%0,%1"						\
+		 : "=r" ((USItype) (count))				\
+		 : "r" ((USItype) (x)));					\
+	(count) += 16;							\
+      }									\
+  } while (0)
+#endif
+
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ (								\
+       "dmulu.l	%2,%3\n\tsts	macl,%1\n\tsts	mach,%0"		\
+	   : "=r" ((USItype)(w1)),					\
+	     "=r" ((USItype)(w0))					\
+	   : "r" ((USItype)(u)),					\
+	     "r" ((USItype)(v))						\
+	   : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+#if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
+#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
+#define count_leading_zeros(count, x) \
+  do									\
+    {									\
+      UDItype x_ = (USItype)(x);					\
+      SItype c_;							\
+									\
+      __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));			\
+      (count) = c_ - 31;						\
+    }									\
+  while (0)
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+
+#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
+    && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rJ" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%rJ" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl))					\
+	   __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rJ" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "rJ" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl))					\
+	   __CLOBBER_CC)
+#if defined (__sparc_v8__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "r" ((USItype) (u)),					\
+	     "r" ((USItype) (v)))
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__n1)),					\
+	     "r" ((USItype) (__n0)),					\
+	     "r" ((USItype) (__d)))
+#else
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+   instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "r" ((USItype) (u)),					\
+	     "r" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
+"	tst	%%g0\n"							\
+"	divscc	%3,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%0\n"						\
+"	rd	%%y,%1\n"						\
+"	bl,a 1f\n"							\
+"	add	%1,%4,%1\n"						\
+"1:	! End of inline udiv_qrnnd"					\
+	   : "=r" ((USItype) (q)),					\
+	     "=r" ((USItype) (r))					\
+	   : "r" ((USItype) (n1)),					\
+	     "r" ((USItype) (n0)),					\
+	     "rI" ((USItype) (d))					\
+	   : "g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+  do {                                                                  \
+  __asm__ ("scan %1,1,%0"                                               \
+           : "=r" ((USItype) (count))                                   \
+           : "r" ((USItype) (x)));					\
+  } while (0)
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+   undefined.  */
+#else
+/* SPARC without integer multiplication and divide instructions.
+   (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm\n"					\
+"	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n"\
+"	sra	%3,31,%%o5	! Don't move this insn\n"		\
+"	and	%2,%%o5,%%o5	! Don't move this insn\n"		\
+"	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,0,%%g1\n"						\
+"	add	%%g1,%%o5,%0\n"						\
+"	rd	%%y,%1"							\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "%rI" ((USItype) (u)),					\
+	     "r" ((USItype) (v))						\
+	   : "g1", "o5" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	mov	32,%%g1\n"						\
+"	subcc	%1,%2,%%g0\n"						\
+"1:	bcs	5f\n"							\
+"	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
+"	sub	%1,%2,%1	! this kills msb of n\n"		\
+"	addx	%1,%1,%1	! so this can't give carry\n"		\
+"	subcc	%%g1,1,%%g1\n"						\
+"2:	bne	1b\n"							\
+"	 subcc	%1,%2,%%g0\n"						\
+"	bcs	3f\n"							\
+"	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
+"	b	3f\n"							\
+"	 sub	%1,%2,%1	! this kills msb of n\n"		\
+"4:	sub	%1,%2,%1\n"						\
+"5:	addxcc	%1,%1,%1\n"						\
+"	bcc	2b\n"							\
+"	 subcc	%%g1,1,%%g1\n"						\
+"! Got carry from n.  Subtract next step to cancel this carry.\n"	\
+"	bne	4b\n"							\
+"	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n"	\
+"	sub	%1,%2,%1\n"						\
+"3:	xnor	%0,0,%0\n"						\
+"	! End of inline udiv_qrnnd"					\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__d)),					\
+	     "1" ((USItype) (__n1)),					\
+	     "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
+#define UDIV_TIME (3+7*32)	/* 7 instructions/iteration. 32 iterations.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* sparc32 */
+
+#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
+    && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+  __asm__ ("addcc %r4,%5,%1\n\t"					\
+   	   "add %r2,%3,%0\n\t"						\
+   	   "bcs,a,pn %%xcc, 1f\n\t"					\
+   	   "add %0, 1, %0\n"						\
+	   "1:"								\
+	   : "=r" ((UDItype)(sh)),				      	\
+	     "=&r" ((UDItype)(sl))				      	\
+	   : "%rJ" ((UDItype)(ah)),				     	\
+	     "rI" ((UDItype)(bh)),				      	\
+	     "%rJ" ((UDItype)(al)),				     	\
+	     "rI" ((UDItype)(bl))				       	\
+	   __CLOBBER_CC)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) 				\
+  __asm__ ("subcc %r4,%5,%1\n\t"					\
+   	   "sub %r2,%3,%0\n\t"						\
+   	   "bcs,a,pn %%xcc, 1f\n\t"					\
+   	   "sub %0, 1, %0\n\t"						\
+	   "1:"								\
+	   : "=r" ((UDItype)(sh)),				      	\
+	     "=&r" ((UDItype)(sl))				      	\
+	   : "rJ" ((UDItype)(ah)),				     	\
+	     "rI" ((UDItype)(bh)),				      	\
+	     "rJ" ((UDItype)(al)),				     	\
+	     "rI" ((UDItype)(bl))				       	\
+	   __CLOBBER_CC)
+
+#define umul_ppmm(wh, wl, u, v)						\
+  do {									\
+	  UDItype tmp1, tmp2, tmp3, tmp4;				\
+	  __asm__ __volatile__ (					\
+		   "srl %7,0,%3\n\t"					\
+		   "mulx %3,%6,%1\n\t"					\
+		   "srlx %6,32,%2\n\t"					\
+		   "mulx %2,%3,%4\n\t"					\
+		   "sllx %4,32,%5\n\t"					\
+		   "srl %6,0,%3\n\t"					\
+		   "sub %1,%5,%5\n\t"					\
+		   "srlx %5,32,%5\n\t"					\
+		   "addcc %4,%5,%4\n\t"					\
+		   "srlx %7,32,%5\n\t"					\
+		   "mulx %3,%5,%3\n\t"					\
+		   "mulx %2,%5,%5\n\t"					\
+		   "sethi %%hi(0x80000000),%2\n\t"			\
+		   "addcc %4,%3,%4\n\t"					\
+		   "srlx %4,32,%4\n\t"					\
+		   "add %2,%2,%2\n\t"					\
+		   "movcc %%xcc,%%g0,%2\n\t"				\
+		   "addcc %5,%4,%5\n\t"					\
+		   "sllx %3,32,%3\n\t"					\
+		   "add %1,%3,%1\n\t"					\
+		   "add %5,%2,%0"					\
+	   : "=r" ((UDItype)(wh)),					\
+	     "=&r" ((UDItype)(wl)),					\
+	     "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)	\
+	   : "r" ((UDItype)(u)),					\
+	     "r" ((UDItype)(v))						\
+	   __CLOBBER_CC);						\
+  } while (0)
+#define UMUL_TIME 96
+#define UDIV_TIME 230
+#endif /* sparc64 */
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
+	   : "=g" ((USItype) (sh)),					\
+	     "=&g" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
+	   : "=g" ((USItype) (sh)),					\
+	     "=&g" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {								\
+	UDItype __ll;							\
+	struct {USItype __l, __h;} __i;					\
+      } __xx;								\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("emul %1,%2,$0,%0"						\
+	     : "=r" (__xx.__ll)						\
+	     : "g" (__m0),						\
+	       "g" (__m1));						\
+    (xh) = __xx.__i.__h;						\
+    (xl) = __xx.__i.__l;						\
+    (xh) += ((((SItype) __m0 >> 31) & __m1)				\
+	     + (((SItype) __m1 >> 31) & __m0));				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {SItype __l, __h;} __i;				\
+	  } __xx;							\
+    __xx.__i.__h = n1; __xx.__i.__l = n0;				\
+    __asm__ ("ediv %3,%2,%0,%1"						\
+	     : "=g" (q), "=g" (r)					\
+	     : "g" (__xx.__ll), "g" (d));				\
+  } while (0)
+#endif /* __vax__ */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
+	   : "=r" ((unsigned int)(sh)),					\
+	     "=&r" ((unsigned int)(sl))					\
+	   : "%0" ((unsigned int)(ah)),					\
+	     "r" ((unsigned int)(bh)),					\
+	     "%1" ((unsigned int)(al)),					\
+	     "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
+	   : "=r" ((unsigned int)(sh)),					\
+	     "=&r" ((unsigned int)(sl))					\
+	   : "0" ((unsigned int)(ah)),					\
+	     "r" ((unsigned int)(bh)),					\
+	     "1" ((unsigned int)(al)),					\
+	     "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {long int __ll;						\
+	   struct {unsigned int __h, __l;} __i;				\
+	  } __xx;							\
+    unsigned int __m0 = (m0), __m1 = (m1);				\
+    __asm__ ("mult	%S0,%H3"					\
+	     : "=r" (__xx.__i.__h),					\
+	       "=r" (__xx.__i.__l)					\
+	     : "%1" (__m0),						\
+	       "rQR" (__m1));						\
+    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
+    (xh) += ((((signed int) __m0 >> 15) & __m1)				\
+	     + (((signed int) __m1 >> 15) & __m0));			\
+  } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) + (bl);							\
+    (sh) = (ah) + (bh) + (__x < (al));					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - (__x > (al));					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+   smul_ppmm.  */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __w1;							\
+    UWtype __xm0 = (u), __xm1 = (v);					\
+    smul_ppmm (__w1, w0, __xm0, __xm1);					\
+    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
+		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
+  } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C.  */
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+									\
+    __ul = __ll_lowpart (u);						\
+    __uh = __ll_highpart (u);						\
+    __vl = __ll_lowpart (v);						\
+    __vh = __ll_highpart (v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
+  } while (0)
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+  ({DWunion __w;							\
+    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
+    __w.ll; })
+#endif
+
+/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {									\
+    USItype __r;							\
+    (q) = __udiv_w_sdiv (&__r, nh, nl, d);				\
+    (r) = __r;								\
+  } while (0)
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+extern const UQItype __clz_tab[] ATTRIBUTE_HIDDEN;
+#define count_leading_zeros(count, x) \
+  do {									\
+    UWtype __xr = (x);							\
+    UWtype __a;								\
+									\
+    if (W_TYPE_SIZE <= 32)						\
+      {									\
+	__a = __xr < ((UWtype)1<<2*__BITS4)				\
+	  ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)			\
+	  : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);	\
+      }									\
+    else								\
+      {									\
+	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
+	  if (((__xr >> __a) & 0xff) != 0)				\
+	    break;							\
+      }									\
+									\
+    (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+   defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+  do {									\
+    UWtype __ctz_x = (x);						\
+    UWtype __ctz_c;							\
+    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
+    (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
+  } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif