Added the MC68040 Floating Point Support Package. This was ported

to RTEMS by Eric Norum. It is freely distributable and was acquired from the Motorola WWW site. More info is in the FPSP README.
author: Joel Sherrill <joel.sherrill@OARcorp.com> 1997-04-16 17:33:04 +0000
committer: Joel Sherrill <joel.sherrill@OARcorp.com> 1997-04-16 17:33:04 +0000
commit: f9b93da8b47ff7ea4d6573b75b6077f6efb8dbc6 (patch)
tree: 46e2747b2b8f04d36d530daad59481f4f79e3c00 /c/src/lib/libcpu
parent: Added --disable-tcpip option. (diff)
download: rtems-f9b93da8b47ff7ea4d6573b75b6077f6efb8dbc6.tar.bz2
47 files changed, 16820 insertions, 0 deletions
diff --git a/c/src/lib/libcpu/m68k/Makefile.in b/c/src/lib/libcpu/m68k/Makefile.in
new file mode 100644
index 0000000000..9687252dbb
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/Makefile.in
@@ -0,0 +1,13 @@
+#	
+#  $Id$
+#
+
+@SET_MAKE@
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH=@srcdir@
+
+include $(RTEMS_CUSTOM)
+include $(PROJECT_ROOT)/make/directory.cfg
+
+SUB_DIRS=$(wildcard $(RTEMS_CPU_MODEL))
diff --git a/c/src/lib/libcpu/m68k/m68040/Makefile.in b/c/src/lib/libcpu/m68k/m68040/Makefile.in
new file mode 100644
index 0000000000..7680536125
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/Makefile.in
@@ -0,0 +1,13 @@
+#
+#  $Id$
+#
+
+@SET_MAKE@
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH=@srcdir@
+
+include $(RTEMS_CUSTOM)
+include $(PROJECT_ROOT)/make/directory.cfg
+
+SUB_DIRS=fpsp
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/Makefile.in b/c/src/lib/libcpu/m68k/m68040/fpsp/Makefile.in
new file mode 100644
index 0000000000..533ec6d415
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/Makefile.in
@@ -0,0 +1,63 @@
+#
+#  $Id$
+#
+
+@SET_MAKE@
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH=@srcdir@
+
+PGM=${ARCH}/fpsp.rel
+
+# C source names, if any, go here -- minus the .c
+C_PIECES=rtems_fpsp
+C_FILES=$(C_PIECES:%=%.c)
+C_O_FILES=$(C_PIECES:%=${ARCH}/%.o)
+
+H_FILES=
+
+# Assembly source names, if any, go here -- minus the .s
+S_PIECES= bindec binstr bugfix decbin do_func gen_except get_op kernel_ex \
+    res_func round rtems_skel sacos sasin satan satanh scale scosh setox \
+    sgetem sint slog2 slogn smovecr srem_mod ssin ssinh stan stanh sto_res \
+    stwotox tbldo util x_bsun x_fline x_operr x_ovfl x_snan x_store x_unfl \
+    x_unimp x_unsupp
+S_FILES=$(S_PIECES:%=%.s)
+S_O_FILES=$(S_FILES:%.s=${ARCH}/%.o)
+
+SRCS=$(C_FILES) $(CC_FILES) $(H_FILES) $(S_FILES)
+OBJS=$(C_O_FILES) $(CC_O_FILES) $(S_O_FILES)
+
+include $(RTEMS_CUSTOM)
+include $(PROJECT_ROOT)/make/leaf.cfg
+
+#
+# (OPTIONAL) Add local stuff here using +=
+#
+
+DEFINES  +=
+CPPFLAGS += 
+CFLAGS   += $(CFLAGS_OS_V)
+
+LD_PATHS  += 
+LD_LIBS   += 
+LDFLAGS   += 
+
+#
+# Add your list of files to delete here.  The config files
+#  already know how to delete some stuff, so you may want
+#  to just run 'make clean' first to see what gets missed.
+#  'make clobber' already includes 'make clean'
+#
+
+CLEAN_ADDITIONS +=
+CLOBBER_ADDITIONS +=
+
+${PGM}: ${SRCS} ${OBJS}
+	$(make-rel)
+
+all:	${ARCH} $(SRCS) $(PGM)
+
+# the .rel file built here will be put into libbsp.a by
+#    libbsp/hppa/BSP/wrapup/Makefile
+install:  all
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/README b/c/src/lib/libcpu/m68k/m68040/fpsp/README
new file mode 100644
index 0000000000..9917fd4fd8
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/README
@@ -0,0 +1,40 @@
+M68040FPSP -- Motorola 68040 floating point support package
+-----------------------------------------------------------
+
+Modified for RTEMS by Eric Norum (eric@skatter.usask.ca)
+
+To include these routines in your application call
+
+	M68KFPSPInstallExceptionHandlers ();
+
+before performing any floating point operations.
+
+Acknowledgement
+---------------
+
+This code can be obtain from the Motorola Engineer's Toolbox WWW page
+at http://www.mot.com/SPS/HPESD/tools/freeware/040fpsp.html.  Here is
+the description from that page:
+
+  The MC68040 contains a subset of the floating-point hardware that is
+  implemented in the MC68881/882 devices and as such provides reduced yet
+  high performance on-chip floating-point support. Those applications that
+  require full compatibility with earlier members of the M68000 family
+  will need to provide emulation support fo r the un-implemented MC68040
+  floating-point instructions. The M68040FPSP provides complete emulation
+  of the floating-point functionality available in the MC68881/882. 
+
+  The M68040FPSP is offered in source code form to allow integration into
+  existing systems to support either a kernel or library version of
+  floating-point support. The M68040FPSP operates in conjunction with the
+  on-chip MC68040 features to provide fast and full emulation. The kernel
+  version allows full emulation via a trap mechanism to allow full binary
+  compatibility and is fully reentrant. The library version is used to
+  eliminate the trap overhead in situation where re-compilation is
+  possible or desired. 
+
+From this page one may download the original source code.  Inline with
+the first sentence of the second paragraph, we have integrated it with
+RTEMS.
+
+
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/bindec.s b/c/src/lib/libcpu/m68k/m68040/fpsp/bindec.s
new file mode 100644
index 0000000000..6b01e02fcd
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/bindec.s
@@ -0,0 +1,920 @@
+//
+//	bindec.sa 3.4 1/3/91
+//
+//	bindec
+//
+//	Description:
+//		Converts an input in extended precision format
+//		to bcd format.
+//
+//	Input:
+//		a0 points to the input extended precision value
+//		value in memory; d0 contains the k-factor sign-extended
+//		to 32-bits.  The input may be either normalized,
+//		unnormalized, or denormalized.
+//
+//	Output:	result in the FP_SCR1 space on the stack.
+//
+//	Saves and Modifies: D2-D7,A2,FP2
+//
+//	Algorithm:
+//
+//	A1.	Set RM and size ext;  Set SIGMA = sign of input.  
+//		The k-factor is saved for use in d7. Clear the
+//		BINDEC_FLG for separating normalized/denormalized
+//		input.  If input is unnormalized or denormalized,
+//		normalize it.
+//
+//	A2.	Set X = abs(input).
+//
+//	A3.	Compute ILOG.
+//		ILOG is the log base 10 of the input value.  It is
+//		approximated by adding e + 0.f when the original 
+//		value is viewed as 2^^e * 1.f in extended precision.  
+//		This value is stored in d6.
+//
+//	A4.	Clr INEX bit.
+//		The operation in A3 above may have set INEX2.  
+//
+//	A5.	Set ICTR = 0;
+//		ICTR is a flag used in A13.  It must be set before the 
+//		loop entry A6.
+//
+//	A6.	Calculate LEN.
+//		LEN is the number of digits to be displayed.  The
+//		k-factor can dictate either the total number of digits,
+//		if it is a positive number, or the number of digits
+//		after the decimal point which are to be included as
+//		significant.  See the 68882 manual for examples.
+//		If LEN is computed to be greater than 17, set OPERR in
+//		USER_FPSR.  LEN is stored in d4.
+//
+//	A7.	Calculate SCALE.
+//		SCALE is equal to 10^ISCALE, where ISCALE is the number
+//		of decimal places needed to insure LEN integer digits
+//		in the output before conversion to bcd. LAMBDA is the
+//		sign of ISCALE, used in A9. Fp1 contains
+//		10^^(abs(ISCALE)) using a rounding mode which is a
+//		function of the original rounding mode and the signs
+//		of ISCALE and X.  A table is given in the code.
+//
+//	A8.	Clr INEX; Force RZ.
+//		The operation in A3 above may have set INEX2.  
+//		RZ mode is forced for the scaling operation to insure
+//		only one rounding error.  The grs bits are collected in 
+//		the INEX flag for use in A10.
+//
+//	A9.	Scale X -> Y.
+//		The mantissa is scaled to the desired number of
+//		significant digits.  The excess digits are collected
+//		in INEX2.
+//
+//	A10.	Or in INEX.
+//		If INEX is set, round error occurred.  This is
+//		compensated for by 'or-ing' in the INEX2 flag to
+//		the lsb of Y.
+//
+//	A11.	Restore original FPCR; set size ext.
+//		Perform FINT operation in the user's rounding mode.
+//		Keep the size to extended.
+//
+//	A12.	Calculate YINT = FINT(Y) according to user's rounding
+//		mode.  The FPSP routine sintd0 is used.  The output
+//		is in fp0.
+//
+//	A13.	Check for LEN digits.
+//		If the int operation results in more than LEN digits,
+//		or less than LEN -1 digits, adjust ILOG and repeat from
+//		A6.  This test occurs only on the first pass.  If the
+//		result is exactly 10^LEN, decrement ILOG and divide
+//		the mantissa by 10.
+//
+//	A14.	Convert the mantissa to bcd.
+//		The binstr routine is used to convert the LEN digit 
+//		mantissa to bcd in memory.  The input to binstr is
+//		to be a fraction; i.e. (mantissa)/10^LEN and adjusted
+//		such that the decimal point is to the left of bit 63.
+//		The bcd digits are stored in the correct position in 
+//		the final string area in memory.
+//
+//	A15.	Convert the exponent to bcd.
+//		As in A14 above, the exp is converted to bcd and the
+//		digits are stored in the final string.
+//		Test the length of the final exponent string.  If the
+//		length is 4, set operr.
+//
+//	A16.	Write sign bits to final string.
+//
+//	Implementation Notes:
+//
+//	The registers are used as follows:
+//
+//		d0: scratch; LEN input to binstr
+//		d1: scratch
+//		d2: upper 32-bits of mantissa for binstr
+//		d3: scratch;lower 32-bits of mantissa for binstr
+//		d4: LEN
+//      		d5: LAMBDA/ICTR
+//		d6: ILOG
+//		d7: k-factor
+//		a0: ptr for original operand/final result
+//		a1: scratch pointer
+//		a2: pointer to FP_X; abs(original value) in ext
+//		fp0: scratch
+//		fp1: scratch
+//		fp2: scratch
+//		F_SCR1:
+//		F_SCR2:
+//		L_SCR1:
+//		L_SCR2:
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//BINDEC    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	.include "fpsp.defs"
+
+	|section	8
+
+// Constants in extended precision
+LOG2: 	.long	0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
+LOG2UP1:	.long	0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
+
+// Constants in single precision
+FONE: 	.long	0x3F800000,0x00000000,0x00000000,0x00000000
+FTWO:	.long	0x40000000,0x00000000,0x00000000,0x00000000
+FTEN: 	.long	0x41200000,0x00000000,0x00000000,0x00000000
+F4933:	.long	0x459A2800,0x00000000,0x00000000,0x00000000
+
+RBDTBL: 	.byte	0,0,0,0
+	.byte	3,3,2,2
+	.byte	3,2,2,3
+	.byte	2,3,3,2
+
+	|xref	binstr
+	|xref	sintdo
+	|xref	ptenrn,ptenrm,ptenrp
+
+	.global	bindec
+	.global	sc_mul
+bindec:
+	moveml	%d2-%d7/%a2,-(%a7)
+	fmovemx %fp0-%fp2,-(%a7)
+
+// A1. Set RM and size ext. Set SIGMA = sign input;
+//     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
+//     separating  normalized/denormalized input.  If the input
+//     is a denormalized number, set the BINDEC_FLG memory word
+//     to signal denorm.  If the input is unnormalized, normalize
+//     the input and test for denormalized result.  
+//
+	fmovel	#rm_mode,%FPCR	//set RM and ext
+	movel	(%a0),L_SCR2(%a6)	//save exponent for sign check
+	movel	%d0,%d7		//move k-factor to d7
+	clrb	BINDEC_FLG(%a6)	//clr norm/denorm flag
+	movew	STAG(%a6),%d0	//get stag
+	andiw	#0xe000,%d0	//isolate stag bits
+	beq	A2_str		//if zero, input is norm
+//
+// Normalize the denorm
+//
+un_de_norm:
+	movew	(%a0),%d0
+	andiw	#0x7fff,%d0	//strip sign of normalized exp
+	movel	4(%a0),%d1
+	movel	8(%a0),%d2
+norm_loop:
+	subw	#1,%d0
+	lsll	#1,%d2
+	roxll	#1,%d1
+	tstl	%d1
+	bges	norm_loop
+//
+// Test if the normalized input is denormalized
+//
+	tstw	%d0
+	bgts	pos_exp		//if greater than zero, it is a norm
+	st	BINDEC_FLG(%a6)	//set flag for denorm
+pos_exp:
+	andiw	#0x7fff,%d0	//strip sign of normalized exp
+	movew	%d0,(%a0)
+	movel	%d1,4(%a0)
+	movel	%d2,8(%a0)
+
+// A2. Set X = abs(input).
+//
+A2_str:
+	movel	(%a0),FP_SCR2(%a6) // move input to work space
+	movel	4(%a0),FP_SCR2+4(%a6) // move input to work space
+	movel	8(%a0),FP_SCR2+8(%a6) // move input to work space
+	andil	#0x7fffffff,FP_SCR2(%a6) //create abs(X)
+
+// A3. Compute ILOG.
+//     ILOG is the log base 10 of the input value.  It is approx-
+//     imated by adding e + 0.f when the original value is viewed
+//     as 2^^e * 1.f in extended precision.  This value is stored
+//     in d6.
+//
+// Register usage:
+//	Input/Output
+//	d0: k-factor/exponent
+//	d2: x/x
+//	d3: x/x
+//	d4: x/x
+//	d5: x/x
+//	d6: x/ILOG
+//	d7: k-factor/Unchanged
+//	a0: ptr for original operand/final result
+//	a1: x/x
+//	a2: x/x
+//	fp0: x/float(ILOG)
+//	fp1: x/x
+//	fp2: x/x
+//	F_SCR1:x/x
+//	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
+//	L_SCR1:x/x
+//	L_SCR2:first word of X packed/Unchanged
+
+	tstb	BINDEC_FLG(%a6)	//check for denorm
+	beqs	A3_cont		//if clr, continue with norm
+	movel	#-4933,%d6	//force ILOG = -4933
+	bras	A4_str
+A3_cont:
+	movew	FP_SCR2(%a6),%d0	//move exp to d0
+	movew	#0x3fff,FP_SCR2(%a6) //replace exponent with 0x3fff
+	fmovex	FP_SCR2(%a6),%fp0	//now fp0 has 1.f
+	subw	#0x3fff,%d0	//strip off bias
+	faddw	%d0,%fp0		//add in exp
+	fsubs	FONE,%fp0	//subtract off 1.0
+	fbge	pos_res		//if pos, branch 
+	fmulx	LOG2UP1,%fp0	//if neg, mul by LOG2UP1
+	fmovel	%fp0,%d6		//put ILOG in d6 as a lword
+	bras	A4_str		//go move out ILOG
+pos_res:
+	fmulx	LOG2,%fp0	//if pos, mul by LOG2
+	fmovel	%fp0,%d6		//put ILOG in d6 as a lword
+
+
+// A4. Clr INEX bit.
+//     The operation in A3 above may have set INEX2.  
+
+A4_str:	
+	fmovel	#0,%FPSR		//zero all of fpsr - nothing needed
+
+
+// A5. Set ICTR = 0;
+//     ICTR is a flag used in A13.  It must be set before the 
+//     loop entry A6. The lower word of d5 is used for ICTR.
+
+	clrw	%d5		//clear ICTR
+
+
+// A6. Calculate LEN.
+//     LEN is the number of digits to be displayed.  The k-factor
+//     can dictate either the total number of digits, if it is
+//     a positive number, or the number of digits after the
+//     original decimal point which are to be included as
+//     significant.  See the 68882 manual for examples.
+//     If LEN is computed to be greater than 17, set OPERR in
+//     USER_FPSR.  LEN is stored in d4.
+//
+// Register usage:
+//	Input/Output
+//	d0: exponent/Unchanged
+//	d2: x/x/scratch
+//	d3: x/x
+//	d4: exc picture/LEN
+//	d5: ICTR/Unchanged
+//	d6: ILOG/Unchanged
+//	d7: k-factor/Unchanged
+//	a0: ptr for original operand/final result
+//	a1: x/x
+//	a2: x/x
+//	fp0: float(ILOG)/Unchanged
+//	fp1: x/x
+//	fp2: x/x
+//	F_SCR1:x/x
+//	F_SCR2:Abs(X) with $3fff exponent/Unchanged
+//	L_SCR1:x/x
+//	L_SCR2:first word of X packed/Unchanged
+
+A6_str:	
+	tstl	%d7		//branch on sign of k
+	bles	k_neg		//if k <= 0, LEN = ILOG + 1 - k
+	movel	%d7,%d4		//if k > 0, LEN = k
+	bras	len_ck		//skip to LEN check
+k_neg:
+	movel	%d6,%d4		//first load ILOG to d4
+	subl	%d7,%d4		//subtract off k
+	addql	#1,%d4		//add in the 1
+len_ck:
+	tstl	%d4		//LEN check: branch on sign of LEN
+	bles	LEN_ng		//if neg, set LEN = 1
+	cmpl	#17,%d4		//test if LEN > 17
+	bles	A7_str		//if not, forget it
+	movel	#17,%d4		//set max LEN = 17
+	tstl	%d7		//if negative, never set OPERR
+	bles	A7_str		//if positive, continue
+	orl	#opaop_mask,USER_FPSR(%a6) //set OPERR & AIOP in USER_FPSR
+	bras	A7_str		//finished here
+LEN_ng:
+	moveql	#1,%d4		//min LEN is 1
+
+
+// A7. Calculate SCALE.
+//     SCALE is equal to 10^ISCALE, where ISCALE is the number
+//     of decimal places needed to insure LEN integer digits
+//     in the output before conversion to bcd. LAMBDA is the sign
+//     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
+//     the rounding mode as given in the following table (see
+//     Coonen, p. 7.23 as ref.; however, the SCALE variable is
+//     of opposite sign in bindec.sa from Coonen).
+//
+//	Initial					USE
+//	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
+//	----------------------------------------------
+//	 RN	00	   0	   0		00/0	RN
+//	 RN	00	   0	   1		00/0	RN
+//	 RN	00	   1	   0		00/0	RN
+//	 RN	00	   1	   1		00/0	RN
+//	 RZ	01	   0	   0		11/3	RP
+//	 RZ	01	   0	   1		11/3	RP
+//	 RZ	01	   1	   0		10/2	RM
+//	 RZ	01	   1	   1		10/2	RM
+//	 RM	10	   0	   0		11/3	RP
+//	 RM	10	   0	   1		10/2	RM
+//	 RM	10	   1	   0		10/2	RM
+//	 RM	10	   1	   1		11/3	RP
+//	 RP	11	   0	   0		10/2	RM
+//	 RP	11	   0	   1		11/3	RP
+//	 RP	11	   1	   0		11/3	RP
+//	 RP	11	   1	   1		10/2	RM
+//
+// Register usage:
+//	Input/Output
+//	d0: exponent/scratch - final is 0
+//	d2: x/0 or 24 for A9
+//	d3: x/scratch - offset ptr into PTENRM array
+//	d4: LEN/Unchanged
+//	d5: 0/ICTR:LAMBDA
+//	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
+//	d7: k-factor/Unchanged
+//	a0: ptr for original operand/final result
+//	a1: x/ptr to PTENRM array
+//	a2: x/x
+//	fp0: float(ILOG)/Unchanged
+//	fp1: x/10^ISCALE
+//	fp2: x/x
+//	F_SCR1:x/x
+//	F_SCR2:Abs(X) with $3fff exponent/Unchanged
+//	L_SCR1:x/x
+//	L_SCR2:first word of X packed/Unchanged
+
+A7_str:	
+	tstl	%d7		//test sign of k
+	bgts	k_pos		//if pos and > 0, skip this
+	cmpl	%d6,%d7		//test k - ILOG
+	blts	k_pos		//if ILOG >= k, skip this
+	movel	%d7,%d6		//if ((k<0) & (ILOG < k)) ILOG = k
+k_pos:	
+	movel	%d6,%d0		//calc ILOG + 1 - LEN in d0
+	addql	#1,%d0		//add the 1
+	subl	%d4,%d0		//sub off LEN
+	swap	%d5		//use upper word of d5 for LAMBDA
+	clrw	%d5		//set it zero initially
+	clrw	%d2		//set up d2 for very small case
+	tstl	%d0		//test sign of ISCALE
+	bges	iscale		//if pos, skip next inst
+	addqw	#1,%d5		//if neg, set LAMBDA true
+	cmpl	#0xffffecd4,%d0	//test iscale <= -4908
+	bgts	no_inf		//if false, skip rest
+	addil	#24,%d0		//add in 24 to iscale
+	movel	#24,%d2		//put 24 in d2 for A9
+no_inf:	
+	negl	%d0		//and take abs of ISCALE
+iscale:	
+	fmoves	FONE,%fp1	//init fp1 to 1
+	bfextu	USER_FPCR(%a6){#26:#2},%d1 //get initial rmode bits
+	lslw	#1,%d1		//put them in bits 2:1
+	addw	%d5,%d1		//add in LAMBDA
+	lslw	#1,%d1		//put them in bits 3:1
+	tstl	L_SCR2(%a6)	//test sign of original x
+	bges	x_pos		//if pos, don't set bit 0
+	addql	#1,%d1		//if neg, set bit 0
+x_pos:
+	leal	RBDTBL,%a2	//load rbdtbl base
+	moveb	(%a2,%d1),%d3	//load d3 with new rmode
+	lsll	#4,%d3		//put bits in proper position
+	fmovel	%d3,%fpcr		//load bits into fpu
+	lsrl	#4,%d3		//put bits in proper position
+	tstb	%d3		//decode new rmode for pten table
+	bnes	not_rn		//if zero, it is RN
+	leal	PTENRN,%a1	//load a1 with RN table base
+	bras	rmode		//exit decode
+not_rn:
+	lsrb	#1,%d3		//get lsb in carry
+	bccs	not_rp		//if carry clear, it is RM
+	leal	PTENRP,%a1	//load a1 with RP table base
+	bras	rmode		//exit decode
+not_rp:
+	leal	PTENRM,%a1	//load a1 with RM table base
+rmode:
+	clrl	%d3		//clr table index
+e_loop:	
+	lsrl	#1,%d0		//shift next bit into carry
+	bccs	e_next		//if zero, skip the mul
+	fmulx	(%a1,%d3),%fp1	//mul by 10**(d3_bit_no)
+e_next:	
+	addl	#12,%d3		//inc d3 to next pwrten table entry
+	tstl	%d0		//test if ISCALE is zero
+	bnes	e_loop		//if not, loop
+
+
+// A8. Clr INEX; Force RZ.
+//     The operation in A3 above may have set INEX2.  
+//     RZ mode is forced for the scaling operation to insure
+//     only one rounding error.  The grs bits are collected in 
+//     the INEX flag for use in A10.
+//
+// Register usage:
+//	Input/Output
+
+	fmovel	#0,%FPSR		//clr INEX 
+	fmovel	#rz_mode,%FPCR	//set RZ rounding mode
+
+
+// A9. Scale X -> Y.
+//     The mantissa is scaled to the desired number of significant
+//     digits.  The excess digits are collected in INEX2. If mul,
+//     Check d2 for excess 10 exponential value.  If not zero, 
+//     the iscale value would have caused the pwrten calculation
+//     to overflow.  Only a negative iscale can cause this, so
+//     multiply by 10^(d2), which is now only allowed to be 24,
+//     with a multiply by 10^8 and 10^16, which is exact since
+//     10^24 is exact.  If the input was denormalized, we must
+//     create a busy stack frame with the mul command and the
+//     two operands, and allow the fpu to complete the multiply.
+//
+// Register usage:
+//	Input/Output
+//	d0: FPCR with RZ mode/Unchanged
+//	d2: 0 or 24/unchanged
+//	d3: x/x
+//	d4: LEN/Unchanged
+//	d5: ICTR:LAMBDA
+//	d6: ILOG/Unchanged
+//	d7: k-factor/Unchanged
+//	a0: ptr for original operand/final result
+//	a1: ptr to PTENRM array/Unchanged
+//	a2: x/x
+//	fp0: float(ILOG)/X adjusted for SCALE (Y)
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: x/x
+//	F_SCR1:x/x
+//	F_SCR2:Abs(X) with $3fff exponent/Unchanged
+//	L_SCR1:x/x
+//	L_SCR2:first word of X packed/Unchanged
+
+A9_str:	
+	fmovex	(%a0),%fp0	//load X from memory
+	fabsx	%fp0		//use abs(X)
+	tstw	%d5		//LAMBDA is in lower word of d5
+	bnes	sc_mul		//if neg (LAMBDA = 1), scale by mul
+	fdivx	%fp1,%fp0		//calculate X / SCALE -> Y to fp0
+	bras	A10_st		//branch to A10
+
+sc_mul:
+	tstb	BINDEC_FLG(%a6)	//check for denorm
+	beqs	A9_norm		//if norm, continue with mul
+	fmovemx %fp1-%fp1,-(%a7)	//load ETEMP with 10^ISCALE
+	movel	8(%a0),-(%a7)	//load FPTEMP with input arg
+	movel	4(%a0),-(%a7)
+	movel	(%a0),-(%a7)
+	movel	#18,%d3		//load count for busy stack
+A9_loop:
+	clrl	-(%a7)		//clear lword on stack
+	dbf	%d3,A9_loop	
+	moveb	VER_TMP(%a6),(%a7) //write current version number
+	moveb	#BUSY_SIZE-4,1(%a7) //write current busy size 
+	moveb	#0x10,0x44(%a7)	//set fcefpte[15] bit
+	movew	#0x0023,0x40(%a7)	//load cmdreg1b with mul command
+	moveb	#0xfe,0x8(%a7)	//load all 1s to cu savepc
+	frestore (%a7)+		//restore frame to fpu for completion
+	fmulx	36(%a1),%fp0	//multiply fp0 by 10^8
+	fmulx	48(%a1),%fp0	//multiply fp0 by 10^16
+	bras	A10_st
+A9_norm:
+	tstw	%d2		//test for small exp case
+	beqs	A9_con		//if zero, continue as normal
+	fmulx	36(%a1),%fp0	//multiply fp0 by 10^8
+	fmulx	48(%a1),%fp0	//multiply fp0 by 10^16
+A9_con:
+	fmulx	%fp1,%fp0		//calculate X * SCALE -> Y to fp0
+
+
+// A10. Or in INEX.
+//      If INEX is set, round error occurred.  This is compensated
+//      for by 'or-ing' in the INEX2 flag to the lsb of Y.
+//
+// Register usage:
+//	Input/Output
+//	d0: FPCR with RZ mode/FPSR with INEX2 isolated
+//	d2: x/x
+//	d3: x/x
+//	d4: LEN/Unchanged
+//	d5: ICTR:LAMBDA
+//	d6: ILOG/Unchanged
+//	d7: k-factor/Unchanged
+//	a0: ptr for original operand/final result
+//	a1: ptr to PTENxx array/Unchanged
+//	a2: x/ptr to FP_SCR2(a6)
+//	fp0: Y/Y with lsb adjusted
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: x/x
+
+A10_st:	
+	fmovel	%FPSR,%d0		//get FPSR
+	fmovex	%fp0,FP_SCR2(%a6)	//move Y to memory
+	leal	FP_SCR2(%a6),%a2	//load a2 with ptr to FP_SCR2
+	btstl	#9,%d0		//check if INEX2 set
+	beqs	A11_st		//if clear, skip rest
+	oril	#1,8(%a2)	//or in 1 to lsb of mantissa
+	fmovex	FP_SCR2(%a6),%fp0	//write adjusted Y back to fpu
+
+
+// A11. Restore original FPCR; set size ext.
+//      Perform FINT operation in the user's rounding mode.  Keep
+//      the size to extended.  The sintdo entry point in the sint
+//      routine expects the FPCR value to be in USER_FPCR for
+//      mode and precision.  The original FPCR is saved in L_SCR1.
+
+A11_st:	
+	movel	USER_FPCR(%a6),L_SCR1(%a6) //save it for later
+	andil	#0x00000030,USER_FPCR(%a6) //set size to ext, 
+//					;block exceptions
+
+
+// A12. Calculate YINT = FINT(Y) according to user's rounding mode.
+//      The FPSP routine sintd0 is used.  The output is in fp0.
+//
+// Register usage:
+//	Input/Output
+//	d0: FPSR with AINEX cleared/FPCR with size set to ext
+//	d2: x/x/scratch
+//	d3: x/x
+//	d4: LEN/Unchanged
+//	d5: ICTR:LAMBDA/Unchanged
+//	d6: ILOG/Unchanged
+//	d7: k-factor/Unchanged
+//	a0: ptr for original operand/src ptr for sintdo
+//	a1: ptr to PTENxx array/Unchanged
+//	a2: ptr to FP_SCR2(a6)/Unchanged
+//	a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
+//	fp0: Y/YINT
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: x/x
+//	F_SCR1:x/x
+//	F_SCR2:Y adjusted for inex/Y with original exponent
+//	L_SCR1:x/original USER_FPCR
+//	L_SCR2:first word of X packed/Unchanged
+
+A12_st:
+	moveml	%d0-%d1/%a0-%a1,-(%a7)	//save regs used by sintd0	
+	movel	L_SCR1(%a6),-(%a7)
+	movel	L_SCR2(%a6),-(%a7)
+	leal	FP_SCR2(%a6),%a0		//a0 is ptr to F_SCR2(a6)
+	fmovex	%fp0,(%a0)		//move Y to memory at FP_SCR2(a6)
+	tstl	L_SCR2(%a6)		//test sign of original operand
+	bges	do_fint			//if pos, use Y 
+	orl	#0x80000000,(%a0)		//if neg, use -Y
+do_fint:
+	movel	USER_FPSR(%a6),-(%a7)
+	bsr	sintdo			//sint routine returns int in fp0
+	moveb	(%a7),USER_FPSR(%a6)
+	addl	#4,%a7
+	movel	(%a7)+,L_SCR2(%a6)
+	movel	(%a7)+,L_SCR1(%a6)
+	moveml	(%a7)+,%d0-%d1/%a0-%a1	//restore regs used by sint	
+	movel	L_SCR2(%a6),FP_SCR2(%a6)	//restore original exponent
+	movel	L_SCR1(%a6),USER_FPCR(%a6) //restore user's FPCR
+
+
+// A13. Check for LEN digits.
+//      If the int operation results in more than LEN digits,
+//      or less than LEN -1 digits, adjust ILOG and repeat from
+//      A6.  This test occurs only on the first pass.  If the
+//      result is exactly 10^LEN, decrement ILOG and divide
+//      the mantissa by 10.  The calculation of 10^LEN cannot
+//      be inexact, since all powers of ten upto 10^27 are exact
+//      in extended precision, so the use of a previous power-of-ten
+//      table will introduce no error.
+//
+//
+// Register usage:
+//	Input/Output
+//	d0: FPCR with size set to ext/scratch final = 0
+//	d2: x/x
+//	d3: x/scratch final = x
+//	d4: LEN/LEN adjusted
+//	d5: ICTR:LAMBDA/LAMBDA:ICTR
+//	d6: ILOG/ILOG adjusted
+//	d7: k-factor/Unchanged
+//	a0: pointer into memory for packed bcd string formation
+//	a1: ptr to PTENxx array/Unchanged
+//	a2: ptr to FP_SCR2(a6)/Unchanged
+//	fp0: int portion of Y/abs(YINT) adjusted
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: x/10^LEN
+//	F_SCR1:x/x
+//	F_SCR2:Y with original exponent/Unchanged
+//	L_SCR1:original USER_FPCR/Unchanged
+//	L_SCR2:first word of X packed/Unchanged
+
+A13_st:	
+	swap	%d5		//put ICTR in lower word of d5
+	tstw	%d5		//check if ICTR = 0
+	bne	not_zr		//if non-zero, go to second test
+//
+// Compute 10^(LEN-1)
+//
+	fmoves	FONE,%fp2	//init fp2 to 1.0
+	movel	%d4,%d0		//put LEN in d0
+	subql	#1,%d0		//d0 = LEN -1
+	clrl	%d3		//clr table index
+l_loop:	
+	lsrl	#1,%d0		//shift next bit into carry
+	bccs	l_next		//if zero, skip the mul
+	fmulx	(%a1,%d3),%fp2	//mul by 10**(d3_bit_no)
+l_next:
+	addl	#12,%d3		//inc d3 to next pwrten table entry
+	tstl	%d0		//test if LEN is zero
+	bnes	l_loop		//if not, loop
+//
+// 10^LEN-1 is computed for this test and A14.  If the input was
+// denormalized, check only the case in which YINT > 10^LEN.
+//
+	tstb	BINDEC_FLG(%a6)	//check if input was norm
+	beqs	A13_con		//if norm, continue with checking
+	fabsx	%fp0		//take abs of YINT
+	bra	test_2
+//
+// Compare abs(YINT) to 10^(LEN-1) and 10^LEN
+//
+A13_con:
+	fabsx	%fp0		//take abs of YINT
+	fcmpx	%fp2,%fp0		//compare abs(YINT) with 10^(LEN-1)
+	fbge	test_2		//if greater, do next test
+	subql	#1,%d6		//subtract 1 from ILOG
+	movew	#1,%d5		//set ICTR
+	fmovel	#rm_mode,%FPCR	//set rmode to RM
+	fmuls	FTEN,%fp2	//compute 10^LEN 
+	bra	A6_str		//return to A6 and recompute YINT
+test_2:
+	fmuls	FTEN,%fp2	//compute 10^LEN
+	fcmpx	%fp2,%fp0		//compare abs(YINT) with 10^LEN
+	fblt	A14_st		//if less, all is ok, go to A14
+	fbgt	fix_ex		//if greater, fix and redo
+	fdivs	FTEN,%fp0	//if equal, divide by 10
+	addql	#1,%d6		// and inc ILOG
+	bras	A14_st		// and continue elsewhere
+fix_ex:
+	addql	#1,%d6		//increment ILOG by 1
+	movew	#1,%d5		//set ICTR
+	fmovel	#rm_mode,%FPCR	//set rmode to RM
+	bra	A6_str		//return to A6 and recompute YINT
+//
+// Since ICTR <> 0, we have already been through one adjustment, 
+// and shouldn't have another; this is to check if abs(YINT) = 10^LEN
+// 10^LEN is again computed using whatever table is in a1 since the
+// value calculated cannot be inexact.
+//
+not_zr:
+	fmoves	FONE,%fp2	//init fp2 to 1.0
+	movel	%d4,%d0		//put LEN in d0
+	clrl	%d3		//clr table index
+z_loop:
+	lsrl	#1,%d0		//shift next bit into carry
+	bccs	z_next		//if zero, skip the mul
+	fmulx	(%a1,%d3),%fp2	//mul by 10**(d3_bit_no)
+z_next:
+	addl	#12,%d3		//inc d3 to next pwrten table entry
+	tstl	%d0		//test if LEN is zero
+	bnes	z_loop		//if not, loop
+	fabsx	%fp0		//get abs(YINT)
+	fcmpx	%fp2,%fp0		//check if abs(YINT) = 10^LEN
+	fbne	A14_st		//if not, skip this
+	fdivs	FTEN,%fp0	//divide abs(YINT) by 10
+	addql	#1,%d6		//and inc ILOG by 1
+	addql	#1,%d4		// and inc LEN
+	fmuls	FTEN,%fp2	// if LEN++, the get 10^^LEN
+
+
+// A14. Convert the mantissa to bcd.
+//      The binstr routine is used to convert the LEN digit 
+//      mantissa to bcd in memory.  The input to binstr is
+//      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
+//      such that the decimal point is to the left of bit 63.
+//      The bcd digits are stored in the correct position in 
+//      the final string area in memory.
+//
+//
+// Register usage:
+//	Input/Output
+//	d0: x/LEN call to binstr - final is 0
+//	d1: x/0
+//	d2: x/ms 32-bits of mant of abs(YINT)
+//	d3: x/ls 32-bits of mant of abs(YINT)
+//	d4: LEN/Unchanged
+//	d5: ICTR:LAMBDA/LAMBDA:ICTR
+//	d6: ILOG
+//	d7: k-factor/Unchanged
+//	a0: pointer into memory for packed bcd string formation
+//	    /ptr to first mantissa byte in result string
+//	a1: ptr to PTENxx array/Unchanged
+//	a2: ptr to FP_SCR2(a6)/Unchanged
+//	fp0: int portion of Y/abs(YINT) adjusted
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: 10^LEN/Unchanged
+//	F_SCR1:x/Work area for final result
+//	F_SCR2:Y with original exponent/Unchanged
+//	L_SCR1:original USER_FPCR/Unchanged
+//	L_SCR2:first word of X packed/Unchanged
+
+A14_st:	
+	fmovel	#rz_mode,%FPCR	//force rz for conversion
+	fdivx	%fp2,%fp0		//divide abs(YINT) by 10^LEN
+	leal	FP_SCR1(%a6),%a0
+	fmovex	%fp0,(%a0)	//move abs(YINT)/10^LEN to memory
+	movel	4(%a0),%d2	//move 2nd word of FP_RES to d2
+	movel	8(%a0),%d3	//move 3rd word of FP_RES to d3
+	clrl	4(%a0)		//zero word 2 of FP_RES
+	clrl	8(%a0)		//zero word 3 of FP_RES
+	movel	(%a0),%d0		//move exponent to d0
+	swap	%d0		//put exponent in lower word
+	beqs	no_sft		//if zero, don't shift
+	subil	#0x3ffd,%d0	//sub bias less 2 to make fract
+	tstl	%d0		//check if > 1
+	bgts	no_sft		//if so, don't shift
+	negl	%d0		//make exp positive
+m_loop:
+	lsrl	#1,%d2		//shift d2:d3 right, add 0s 
+	roxrl	#1,%d3		//the number of places
+	dbf	%d0,m_loop	//given in d0
+no_sft:
+	tstl	%d2		//check for mantissa of zero
+	bnes	no_zr		//if not, go on
+	tstl	%d3		//continue zero check
+	beqs	zer_m		//if zero, go directly to binstr
+no_zr:
+	clrl	%d1		//put zero in d1 for addx
+	addil	#0x00000080,%d3	//inc at bit 7
+	addxl	%d1,%d2		//continue inc
+	andil	#0xffffff80,%d3	//strip off lsb not used by 882
+zer_m:
+	movel	%d4,%d0		//put LEN in d0 for binstr call
+	addql	#3,%a0		//a0 points to M16 byte in result
+	bsr	binstr		//call binstr to convert mant
+
+
+// A15. Convert the exponent to bcd.
+//      As in A14 above, the exp is converted to bcd and the
+//      digits are stored in the final string.
+//
+//      Digits are stored in L_SCR1(a6) on return from BINDEC as:
+//
+//  	 32               16 15                0
+//	-----------------------------------------
+//  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
+//	-----------------------------------------
+//
+// And are moved into their proper places in FP_SCR1.  If digit e4
+// is non-zero, OPERR is signaled.  In all cases, all 4 digits are
+// written as specified in the 881/882 manual for packed decimal.
+//
+// Register usage:
+//	Input/Output
+//	d0: x/LEN call to binstr - final is 0
+//	d1: x/scratch (0);shift count for final exponent packing
+//	d2: x/ms 32-bits of exp fraction/scratch
+//	d3: x/ls 32-bits of exp fraction
+//	d4: LEN/Unchanged
+//	d5: ICTR:LAMBDA/LAMBDA:ICTR
+//	d6: ILOG
+//	d7: k-factor/Unchanged
+//	a0: ptr to result string/ptr to L_SCR1(a6)
+//	a1: ptr to PTENxx array/Unchanged
+//	a2: ptr to FP_SCR2(a6)/Unchanged
+//	fp0: abs(YINT) adjusted/float(ILOG)
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: 10^LEN/Unchanged
+//	F_SCR1:Work area for final result/BCD result
+//	F_SCR2:Y with original exponent/ILOG/10^4
+//	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
+//	L_SCR2:first word of X packed/Unchanged
+
+A15_st:	
+	tstb	BINDEC_FLG(%a6)	//check for denorm
+	beqs	not_denorm
+	ftstx	%fp0		//test for zero
+	fbeq	den_zero	//if zero, use k-factor or 4933
+	fmovel	%d6,%fp0		//float ILOG
+	fabsx	%fp0		//get abs of ILOG
+	bras	convrt
+den_zero:
+	tstl	%d7		//check sign of the k-factor
+	blts	use_ilog	//if negative, use ILOG
+	fmoves	F4933,%fp0	//force exponent to 4933
+	bras	convrt		//do it
+use_ilog:
+	fmovel	%d6,%fp0		//float ILOG
+	fabsx	%fp0		//get abs of ILOG
+	bras	convrt
+not_denorm:
+	ftstx	%fp0		//test for zero
+	fbne	not_zero	//if zero, force exponent
+	fmoves	FONE,%fp0	//force exponent to 1
+	bras	convrt		//do it
+not_zero:	
+	fmovel	%d6,%fp0		//float ILOG
+	fabsx	%fp0		//get abs of ILOG
+convrt:
+	fdivx	24(%a1),%fp0	//compute ILOG/10^4
+	fmovex	%fp0,FP_SCR2(%a6)	//store fp0 in memory
+	movel	4(%a2),%d2	//move word 2 to d2
+	movel	8(%a2),%d3	//move word 3 to d3
+	movew	(%a2),%d0		//move exp to d0
+	beqs	x_loop_fin	//if zero, skip the shift
+	subiw	#0x3ffd,%d0	//subtract off bias
+	negw	%d0		//make exp positive
+x_loop:
+	lsrl	#1,%d2		//shift d2:d3 right 
+	roxrl	#1,%d3		//the number of places
+	dbf	%d0,x_loop	//given in d0
+x_loop_fin:
+	clrl	%d1		//put zero in d1 for addx
+	addil	#0x00000080,%d3	//inc at bit 6
+	addxl	%d1,%d2		//continue inc
+	andil	#0xffffff80,%d3	//strip off lsb not used by 882
+	movel	#4,%d0		//put 4 in d0 for binstr call
+	leal	L_SCR1(%a6),%a0	//a0 is ptr to L_SCR1 for exp digits
+	bsr	binstr		//call binstr to convert exp
+	movel	L_SCR1(%a6),%d0	//load L_SCR1 lword to d0 
+	movel	#12,%d1		//use d1 for shift count
+	lsrl	%d1,%d0		//shift d0 right by 12
+	bfins	%d0,FP_SCR1(%a6){#4:#12} //put e3:e2:e1 in FP_SCR1
+	lsrl	%d1,%d0		//shift d0 right by 12
+	bfins	%d0,FP_SCR1(%a6){#16:#4} //put e4 in FP_SCR1 
+	tstb	%d0		//check if e4 is zero
+	beqs	A16_st		//if zero, skip rest
+	orl	#opaop_mask,USER_FPSR(%a6) //set OPERR & AIOP in USER_FPSR
+
+
+// A16. Write sign bits to final string.
+//	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
+//
+// Register usage:
+//	Input/Output
+//	d0: x/scratch - final is x
+//	d2: x/x
+//	d3: x/x
+//	d4: LEN/Unchanged
+//	d5: ICTR:LAMBDA/LAMBDA:ICTR
+//	d6: ILOG/ILOG adjusted
+//	d7: k-factor/Unchanged
+//	a0: ptr to L_SCR1(a6)/Unchanged
+//	a1: ptr to PTENxx array/Unchanged
+//	a2: ptr to FP_SCR2(a6)/Unchanged
+//	fp0: float(ILOG)/Unchanged
+//	fp1: 10^ISCALE/Unchanged
+//	fp2: 10^LEN/Unchanged
+//	F_SCR1:BCD result with correct signs
+//	F_SCR2:ILOG/10^4
+//	L_SCR1:Exponent digits on return from binstr
+//	L_SCR2:first word of X packed/Unchanged
+
+A16_st:
+	clrl	%d0		//clr d0 for collection of signs
+	andib	#0x0f,FP_SCR1(%a6) //clear first nibble of FP_SCR1 
+	tstl	L_SCR2(%a6)	//check sign of original mantissa
+	bges	mant_p		//if pos, don't set SM
+	moveql	#2,%d0		//move 2 in to d0 for SM
+mant_p:
+	tstl	%d6		//check sign of ILOG
+	bges	wr_sgn		//if pos, don't set SE
+	addql	#1,%d0		//set bit 0 in d0 for SE 
+wr_sgn:
+	bfins	%d0,FP_SCR1(%a6){#0:#2} //insert SM and SE into FP_SCR1
+
+// Clean up and restore all registers used.
+
+	fmovel	#0,%FPSR		//clear possible inex2/ainex bits
+	fmovemx (%a7)+,%fp0-%fp2
+	moveml	(%a7)+,%d2-%d7/%a2
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/binstr.s b/c/src/lib/libcpu/m68k/m68040/fpsp/binstr.s
new file mode 100644
index 0000000000..5688d505f1
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/binstr.s
@@ -0,0 +1,140 @@
+//
+//	binstr.sa 3.3 12/19/90
+//
+//
+//	Description: Converts a 64-bit binary integer to bcd.
+//
+//	Input: 64-bit binary integer in d2:d3, desired length (LEN) in
+//          d0, and a  pointer to start in memory for bcd characters
+//          in d0. (This pointer must point to byte 4 of the first
+//          lword of the packed decimal memory string.)
+//
+//	Output:	LEN bcd digits representing the 64-bit integer.
+//
+//	Algorithm:
+//		The 64-bit binary is assumed to have a decimal point before
+//		bit 63.  The fraction is multiplied by 10 using a mul by 2
+//		shift and a mul by 8 shift.  The bits shifted out of the
+//		msb form a decimal digit.  This process is iterated until
+//		LEN digits are formed.
+//
+//	A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the
+//		digit formed will be assumed the least significant.  This is
+//		to force the first byte formed to have a 0 in the upper 4 bits.
+//
+//	A2. Beginning of the loop:
+//		Copy the fraction in d2:d3 to d4:d5.
+//
+//	A3. Multiply the fraction in d2:d3 by 8 using bit-field
+//		extracts and shifts.  The three msbs from d2 will go into
+//		d1.
+//
+//	A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb
+//		will be collected by the carry.
+//
+//	A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
+//		into d2:d3.  D1 will contain the bcd digit formed.
+//
+//	A6. Test d7.  If zero, the digit formed is the ms digit.  If non-
+//		zero, it is the ls digit.  Put the digit in its place in the
+//		upper word of d0.  If it is the ls digit, write the word
+//		from d0 to memory.
+//
+//	A7. Decrement d6 (LEN counter) and repeat the loop until zero.
+//
+//	Implementation Notes:
+//
+//	The registers are used as follows:
+//
+//		d0: LEN counter
+//		d1: temp used to form the digit
+//		d2: upper 32-bits of fraction for mul by 8
+//		d3: lower 32-bits of fraction for mul by 8
+//		d4: upper 32-bits of fraction for mul by 2
+//		d5: lower 32-bits of fraction for mul by 2
+//		d6: temp for bit-field extracts
+//		d7: byte digit formation word;digit count {0,1}
+//		a0: pointer into memory for packed bcd string formation
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//BINSTR    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	.global	binstr
+binstr:
+	moveml	%d0-%d7,-(%a7)
+//
+// A1: Init d7
+//
+	moveql	#1,%d7			//init d7 for second digit
+	subql	#1,%d0			//for dbf d0 would have LEN+1 passes
+//
+// A2. Copy d2:d3 to d4:d5.  Start loop.
+//
+loop:
+	movel	%d2,%d4			//copy the fraction before muls
+	movel	%d3,%d5			//to d4:d5
+//
+// A3. Multiply d2:d3 by 8; extract msbs into d1.
+//
+	bfextu	%d2{#0:#3},%d1		//copy 3 msbs of d2 into d1
+	asll	#3,%d2			//shift d2 left by 3 places
+	bfextu	%d3{#0:#3},%d6		//copy 3 msbs of d3 into d6
+	asll	#3,%d3			//shift d3 left by 3 places
+	orl	%d6,%d2			//or in msbs from d3 into d2
+//
+// A4. Multiply d4:d5 by 2; add carry out to d1.
+//
+	asll	#1,%d5			//mul d5 by 2
+	roxll	#1,%d4			//mul d4 by 2
+	swap	%d6			//put 0 in d6 lower word
+	addxw	%d6,%d1			//add in extend from mul by 2
+//
+// A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
+//
+	addl	%d5,%d3			//add lower 32 bits
+	nop				//ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
+	addxl	%d4,%d2			//add with extend upper 32 bits
+	nop				//ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
+	addxw	%d6,%d1			//add in extend from add to d1
+	swap	%d6			//with d6 = 0; put 0 in upper word
+//
+// A6. Test d7 and branch.
+//
+	tstw	%d7			//if zero, store digit & to loop
+	beqs	first_d			//if non-zero, form byte & write
+sec_d:
+	swap	%d7			//bring first digit to word d7b
+	aslw	#4,%d7			//first digit in upper 4 bits d7b
+	addw	%d1,%d7			//add in ls digit to d7b
+	moveb	%d7,(%a0)+		//store d7b byte in memory
+	swap	%d7			//put LEN counter in word d7a
+	clrw	%d7			//set d7a to signal no digits done
+	dbf	%d0,loop		//do loop some more!
+	bras	end_bstr		//finished, so exit
+first_d:
+	swap	%d7			//put digit word in d7b
+	movew	%d1,%d7			//put new digit in d7b
+	swap	%d7			//put LEN counter in word d7a
+	addqw	#1,%d7			//set d7a to signal first digit done
+	dbf	%d0,loop		//do loop some more!
+	swap	%d7			//put last digit in string
+	lslw	#4,%d7			//move it to upper 4 bits
+	moveb	%d7,(%a0)+		//store it in memory string
+//
+// Clean up and return with result in fp0.
+//
+end_bstr:
+	moveml	(%a7)+,%d0-%d7
+	rts
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/bugfix.s b/c/src/lib/libcpu/m68k/m68040/fpsp/bugfix.s
new file mode 100644
index 0000000000..c5ad7617cd
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/bugfix.s
@@ -0,0 +1,496 @@
+//
+//	bugfix.sa 3.2 1/31/91
+//
+//
+//	This file contains workarounds for bugs in the 040
+//	relating to the Floating-Point Software Package (FPSP)
+//
+//	Fixes for bugs: 1238
+//
+//	Bug: 1238 
+//
+//
+//    /* The following dirty_bit clear should be left in
+//     * the handler permanently to improve throughput.
+//     * The dirty_bits are located at bits [23:16] in
+//     * longword $08 in the busy frame $4x60.  Bit 16
+//     * corresponds to FP0, bit 17 corresponds to FP1,
+//     * and so on.
+//     */
+//    if  (E3_exception_just_serviced)   {
+//         dirty_bit[cmdreg3b[9:7]] = 0;
+//         }
+//
+//    if  (fsave_format_version != $40)  {goto NOFIX}
+//
+//    if !(E3_exception_just_serviced)   {goto NOFIX}
+//    if  (cupc == 0000000)              {goto NOFIX}
+//    if  ((cmdreg1b[15:13] != 000) &&
+//         (cmdreg1b[15:10] != 010001))  {goto NOFIX}
+//    if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
+//				      (cmdreg1b[12:10] != cmdreg3b[9:7]))  ) &&
+//	 ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
+//	  (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) )  {goto NOFIX}
+//
+//    /* Note: for 6d43b or 8d43b, you may want to add the following code
+//     * to get better coverage.  (If you do not insert this code, the part
+//     * won't lock up; it will simply get the wrong answer.)
+//     * Do NOT insert this code for 10d43b or later parts.
+//     *
+//     *  if (fpiarcu == integer stack return address) {
+//     *       cupc = 0000000;
+//     *       goto NOFIX;
+//     *       }
+//     */
+//
+//    if (cmdreg1b[15:13] != 000)   {goto FIX_OPCLASS2}
+//    FIX_OPCLASS0:
+//    if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
+//	 (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
+//	(cmdreg1b[12:10] != cmdreg3b[9:7]) &&
+//	(cmdreg1b[ 9: 7] != cmdreg3b[9:7]))  {  /* xu conflict only */
+//	/* We execute the following code if there is an
+//	   xu conflict and NOT an nu conflict */
+//
+//	/* first save some values on the fsave frame */
+//	stag_temp     = STAG[fsave_frame];
+//	cmdreg1b_temp = CMDREG1B[fsave_frame];
+//	dtag_temp     = DTAG[fsave_frame];
+//	ete15_temp    = ETE15[fsave_frame];
+//
+//	CUPC[fsave_frame] = 0000000;
+//	FRESTORE
+//	FSAVE
+//
+//	/* If the xu instruction is exceptional, we punt.
+//	 * Otherwise, we would have to include OVFL/UNFL handler
+//	 * code here to get the correct answer.
+//	 */
+//	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
+//
+//	fsave_frame = /* build a long frame of all zeros */
+//	fsave_frame_format = $4060;  /* label it as long frame */
+//
+//	/* load it with the temps we saved */
+//	STAG[fsave_frame]     =  stag_temp;
+//	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
+//	DTAG[fsave_frame]     =  dtag_temp;
+//	ETE15[fsave_frame]    =  ete15_temp;
+//
+//	/* Make sure that the cmdreg3b dest reg is not going to
+//	 * be destroyed by a FMOVEM at the end of all this code.
+//	 * If it is, you should move the current value of the reg
+//	 * onto the stack so that the reg will loaded with that value.
+//	 */
+//
+//	/* All done.  Proceed with the code below */
+//    }
+//
+//    etemp  = FP_reg_[cmdreg1b[12:10]];
+//    ete15  = ~ete14;
+//    cmdreg1b[15:10] = 010010;
+//    clear(bug_flag_procIDxxxx);
+//    FRESTORE and return;
+//
+//
+//    FIX_OPCLASS2:
+//    if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
+//	(cmdreg1b[9:7] != cmdreg3b[9:7]))  {  /* xu conflict only */
+//	/* We execute the following code if there is an
+//	   xu conflict and NOT an nu conflict */
+//
+//	/* first save some values on the fsave frame */
+//	stag_temp     = STAG[fsave_frame];
+//	cmdreg1b_temp = CMDREG1B[fsave_frame];
+//	dtag_temp     = DTAG[fsave_frame];
+//	ete15_temp    = ETE15[fsave_frame];
+//	etemp_temp    = ETEMP[fsave_frame];
+//
+//	CUPC[fsave_frame] = 0000000;
+//	FRESTORE
+//	FSAVE
+//
+//
+//	/* If the xu instruction is exceptional, we punt.
+//	 * Otherwise, we would have to include OVFL/UNFL handler
+//	 * code here to get the correct answer.
+//	 */
+//	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
+//
+//	fsave_frame = /* build a long frame of all zeros */
+//	fsave_frame_format = $4060;  /* label it as long frame */
+//
+//	/* load it with the temps we saved */
+//	STAG[fsave_frame]     =  stag_temp;
+//	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
+//	DTAG[fsave_frame]     =  dtag_temp;
+//	ETE15[fsave_frame]    =  ete15_temp;
+//	ETEMP[fsave_frame]    =  etemp_temp;
+//
+//	/* Make sure that the cmdreg3b dest reg is not going to
+//	 * be destroyed by a FMOVEM at the end of all this code.
+//	 * If it is, you should move the current value of the reg
+//	 * onto the stack so that the reg will loaded with that value.
+//	 */
+//
+//	/* All done.  Proceed with the code below */
+//    }
+//
+//    if (etemp_exponent == min_sgl)   etemp_exponent = min_dbl;
+//    if (etemp_exponent == max_sgl)   etemp_exponent = max_dbl;
+//    cmdreg1b[15:10] = 010101;
+//    clear(bug_flag_procIDxxxx);
+//    FRESTORE and return;
+//
+//
+//    NOFIX:
+//    clear(bug_flag_procIDxxxx);
+//    FRESTORE and return;
+//
+
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//BUGFIX    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	fpsp_fmt_error
+
+	.global	b1238_fix
+b1238_fix:
+//
+// This code is entered only on completion of the handling of an 
+// nu-generated ovfl, unfl, or inex exception.  If the version 
+// number of the fsave is not $40, this handler is not necessary.
+// Simply branch to fix_done and exit normally.
+//
+	cmpib	#VER_40,4(%a7)
+	bne	fix_done
+//
+// Test for cu_savepc equal to zero.  If not, this is not a bug
+// #1238 case.
+//
+	moveb	CU_SAVEPC(%a6),%d0
+	andib	#0xFE,%d0
+	beq 	fix_done	//if zero, this is not bug #1238
+
+//
+// Test the register conflict aspect.  If opclass0, check for
+// cu src equal to xu dest or equal to nu dest.  If so, go to 
+// op0.  Else, or if opclass2, check for cu dest equal to
+// xu dest or equal to nu dest.  If so, go to tst_opcl.  Else,
+// exit, it is not the bug case.
+//
+// Check for opclass 0.  If not, go and check for opclass 2 and sgl.
+//
+	movew	CMDREG1B(%a6),%d0
+	andiw	#0xE000,%d0		//strip all but opclass
+	bne	op2sgl			//not opclass 0, check op2
+//
+// Check for cu and nu register conflict.  If one exists, this takes
+// priority over a cu and xu conflict. 
+//
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get 1st src 
+	bfextu	CMDREG3B(%a6){#6:#3},%d1	//get 3rd dest
+	cmpb	%d0,%d1
+	beqs	op0			//if equal, continue bugfix
+//
+// Check for cu dest equal to nu dest.  If so, go and fix the 
+// bug condition.  Otherwise, exit.
+//
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	//get 1st dest 
+	cmpb	%d0,%d1			//cmp 1st dest with 3rd dest
+	beqs	op0			//if equal, continue bugfix
+//
+// Check for cu and xu register conflict.
+//
+	bfextu	CMDREG2B(%a6){#6:#3},%d1	//get 2nd dest
+	cmpb	%d0,%d1			//cmp 1st dest with 2nd dest
+	beqs	op0_xu			//if equal, continue bugfix
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get 1st src 
+	cmpb	%d0,%d1			//cmp 1st src with 2nd dest
+	beq	op0_xu
+	bne	fix_done		//if the reg checks fail, exit
+//
+// We have the opclass 0 situation.
+//
+op0:
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get source register no
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx %d0,ETEMP(%a6)		//load source to ETEMP
+
+	moveb	#0x12,%d0
+	bfins	%d0,CMDREG1B(%a6){#0:#6}	//opclass 2, extended
+//
+//	Set ETEMP exponent bit 15 as the opposite of ete14
+//
+	btst	#6,ETEMP_EX(%a6)		//check etemp exponent bit 14
+	beq	setete15
+	bclr	#etemp15_bit,STAG(%a6)
+	bra	finish
+setete15:
+	bset	#etemp15_bit,STAG(%a6)
+	bra	finish
+
+//
+// We have the case in which a conflict exists between the cu src or
+// dest and the dest of the xu.  We must clear the instruction in 
+// the cu and restore the state, allowing the instruction in the
+// xu to complete.  Remember, the instruction in the nu
+// was exceptional, and was completed by the appropriate handler.
+// If the result of the xu instruction is not exceptional, we can
+// restore the instruction from the cu to the frame and continue
+// processing the original exception.  If the result is also
+// exceptional, we choose to kill the process.
+//
+//	Items saved from the stack:
+//	
+//		$3c stag     - L_SCR1
+//		$40 cmdreg1b - L_SCR2
+//		$44 dtag     - L_SCR3
+//
+// The cu savepc is set to zero, and the frame is restored to the
+// fpu.
+//
+op0_xu:
+	movel	STAG(%a6),L_SCR1(%a6)	
+	movel	CMDREG1B(%a6),L_SCR2(%a6)	
+	movel	DTAG(%a6),L_SCR3(%a6)
+	andil	#0xe0000000,L_SCR3(%a6)
+	moveb	#0,CU_SAVEPC(%a6)
+	movel	(%a7)+,%d1		//save return address from bsr
+	frestore (%a7)+
+	fsave	-(%a7)
+//
+// Check if the instruction which just completed was exceptional.
+// 
+	cmpw	#0x4060,(%a7)
+	beq	op0_xb
+// 
+// It is necessary to isolate the result of the instruction in the
+// xu if it is to fp0 - fp3 and write that value to the USER_FPn
+// locations on the stack.  The correct destination register is in 
+// cmdreg2b.
+//
+	bfextu	CMDREG2B(%a6){#6:#3},%d0	//get dest register no
+	cmpil	#3,%d0
+	bgts	op0_xi
+	beqs	op0_fp3
+	cmpil	#1,%d0
+	blts	op0_fp0
+	beqs	op0_fp1
+op0_fp2:
+	fmovemx %fp2-%fp2,USER_FP2(%a6)
+	bras	op0_xi
+op0_fp1:
+	fmovemx %fp1-%fp1,USER_FP1(%a6)
+	bras	op0_xi
+op0_fp0:
+	fmovemx %fp0-%fp0,USER_FP0(%a6)
+	bras	op0_xi
+op0_fp3:
+	fmovemx %fp3-%fp3,USER_FP3(%a6)
+//
+// The frame returned is idle.  We must build a busy frame to hold
+// the cu state information and setup etemp.
+//
+op0_xi:
+	movel	#22,%d0		//clear 23 lwords
+	clrl	(%a7)
+op0_loop:
+	clrl	-(%a7)
+	dbf	%d0,op0_loop
+	movel	#0x40600000,-(%a7)
+	movel	L_SCR1(%a6),STAG(%a6)
+	movel	L_SCR2(%a6),CMDREG1B(%a6)
+	movel	L_SCR3(%a6),DTAG(%a6)
+	moveb	#0x6,CU_SAVEPC(%a6)
+	movel	%d1,-(%a7)		//return bsr return address
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get source register no
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx %d0,ETEMP(%a6)		//load source to ETEMP
+
+	moveb	#0x12,%d0
+	bfins	%d0,CMDREG1B(%a6){#0:#6}	//opclass 2, extended
+//
+//	Set ETEMP exponent bit 15 as the opposite of ete14
+//
+	btst	#6,ETEMP_EX(%a6)		//check etemp exponent bit 14
+	beq	op0_sete15
+	bclr	#etemp15_bit,STAG(%a6)
+	bra	finish
+op0_sete15:
+	bset	#etemp15_bit,STAG(%a6)
+	bra	finish
+
+//
+// The frame returned is busy.  It is not possible to reconstruct
+// the code sequence to allow completion.  We will jump to 
+// fpsp_fmt_error and allow the kernel to kill the process.
+//
+op0_xb:
+	jmp	fpsp_fmt_error
+
+//
+// Check for opclass 2 and single size.  If not both, exit.
+//
+op2sgl:
+	movew	CMDREG1B(%a6),%d0
+	andiw	#0xFC00,%d0		//strip all but opclass and size
+	cmpiw	#0x4400,%d0		//test for opclass 2 and size=sgl
+	bne	fix_done		//if not, it is not bug 1238
+//
+// Check for cu dest equal to nu dest or equal to xu dest, with 
+// a cu and nu conflict taking priority an nu conflict.  If either,
+// go and fix the bug condition.  Otherwise, exit.
+//
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	//get 1st dest 
+	bfextu	CMDREG3B(%a6){#6:#3},%d1	//get 3rd dest
+	cmpb	%d0,%d1			//cmp 1st dest with 3rd dest
+	beq	op2_com			//if equal, continue bugfix
+	bfextu	CMDREG2B(%a6){#6:#3},%d1	//get 2nd dest 
+	cmpb	%d0,%d1			//cmp 1st dest with 2nd dest
+	bne	fix_done		//if the reg checks fail, exit
+//
+// We have the case in which a conflict exists between the cu src or
+// dest and the dest of the xu.  We must clear the instruction in 
+// the cu and restore the state, allowing the instruction in the
+// xu to complete.  Remember, the instruction in the nu
+// was exceptional, and was completed by the appropriate handler.
+// If the result of the xu instruction is not exceptional, we can
+// restore the instruction from the cu to the frame and continue
+// processing the original exception.  If the result is also
+// exceptional, we choose to kill the process.
+//
+//	Items saved from the stack:
+//	
+//		$3c stag     - L_SCR1
+//		$40 cmdreg1b - L_SCR2
+//		$44 dtag     - L_SCR3
+//		etemp        - FP_SCR2
+//
+// The cu savepc is set to zero, and the frame is restored to the
+// fpu.
+//
+op2_xu:
+	movel	STAG(%a6),L_SCR1(%a6)	
+	movel	CMDREG1B(%a6),L_SCR2(%a6)	
+	movel	DTAG(%a6),L_SCR3(%a6)	
+	andil	#0xe0000000,L_SCR3(%a6)
+	moveb	#0,CU_SAVEPC(%a6)
+	movel	ETEMP(%a6),FP_SCR2(%a6)
+	movel	ETEMP_HI(%a6),FP_SCR2+4(%a6)
+	movel	ETEMP_LO(%a6),FP_SCR2+8(%a6)
+	movel	(%a7)+,%d1		//save return address from bsr
+	frestore (%a7)+
+	fsave	-(%a7)
+//
+// Check if the instruction which just completed was exceptional.
+// 
+	cmpw	#0x4060,(%a7)
+	beq	op2_xb
+// 
+// It is necessary to isolate the result of the instruction in the
+// xu if it is to fp0 - fp3 and write that value to the USER_FPn
+// locations on the stack.  The correct destination register is in 
+// cmdreg2b.
+//
+	bfextu	CMDREG2B(%a6){#6:#3},%d0	//get dest register no
+	cmpil	#3,%d0
+	bgts	op2_xi
+	beqs	op2_fp3
+	cmpil	#1,%d0
+	blts	op2_fp0
+	beqs	op2_fp1
+op2_fp2:
+	fmovemx %fp2-%fp2,USER_FP2(%a6)
+	bras	op2_xi
+op2_fp1:
+	fmovemx %fp1-%fp1,USER_FP1(%a6)
+	bras	op2_xi
+op2_fp0:
+	fmovemx %fp0-%fp0,USER_FP0(%a6)
+	bras	op2_xi
+op2_fp3:
+	fmovemx %fp3-%fp3,USER_FP3(%a6)
+//
+// The frame returned is idle.  We must build a busy frame to hold
+// the cu state information and fix up etemp.
+//
+op2_xi:
+	movel	#22,%d0		//clear 23 lwords
+	clrl	(%a7)
+op2_loop:
+	clrl	-(%a7)
+	dbf	%d0,op2_loop
+	movel	#0x40600000,-(%a7)
+	movel	L_SCR1(%a6),STAG(%a6)
+	movel	L_SCR2(%a6),CMDREG1B(%a6)
+	movel	L_SCR3(%a6),DTAG(%a6)
+	moveb	#0x6,CU_SAVEPC(%a6)
+	movel	FP_SCR2(%a6),ETEMP(%a6)
+	movel	FP_SCR2+4(%a6),ETEMP_HI(%a6)
+	movel	FP_SCR2+8(%a6),ETEMP_LO(%a6)
+	movel	%d1,-(%a7)
+	bra	op2_com
+
+//
+// We have the opclass 2 single source situation.
+//
+op2_com:
+	moveb	#0x15,%d0
+	bfins	%d0,CMDREG1B(%a6){#0:#6}	//opclass 2, double
+
+	cmpw	#0x407F,ETEMP_EX(%a6)	//single +max
+	bnes	case2
+	movew	#0x43FF,ETEMP_EX(%a6)	//to double +max
+	bra	finish
+case2:	
+	cmpw	#0xC07F,ETEMP_EX(%a6)	//single -max
+	bnes	case3
+	movew	#0xC3FF,ETEMP_EX(%a6)	//to double -max
+	bra	finish
+case3:	
+	cmpw	#0x3F80,ETEMP_EX(%a6)	//single +min
+	bnes	case4
+	movew	#0x3C00,ETEMP_EX(%a6)	//to double +min
+	bra	finish
+case4:
+	cmpw	#0xBF80,ETEMP_EX(%a6)	//single -min
+	bne	fix_done
+	movew	#0xBC00,ETEMP_EX(%a6)	//to double -min
+	bra	finish
+//
+// The frame returned is busy.  It is not possible to reconstruct
+// the code sequence to allow completion.  fpsp_fmt_error causes
+// an fline illegal instruction to be executed.
+//
+// You should replace the jump to fpsp_fmt_error with a jump
+// to the entry point used to kill a process. 
+//
+op2_xb:
+	jmp	fpsp_fmt_error
+
+//
+// Enter here if the case is not of the situations affected by
+// bug #1238, or if the fix is completed, and exit.
+//
+finish:
+fix_done:
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/decbin.s b/c/src/lib/libcpu/m68k/m68040/fpsp/decbin.s
new file mode 100644
index 0000000000..c1abffe4b3
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/decbin.s
@@ -0,0 +1,506 @@
+//
+//	decbin.sa 3.3 12/19/90
+//
+//	Description: Converts normalized packed bcd value pointed to by
+//	register A6 to extended-precision value in FP0.
+//
+//	Input: Normalized packed bcd value in ETEMP(a6).
+//
+//	Output:	Exact floating-point representation of the packed bcd value.
+//
+//	Saves and Modifies: D2-D5
+//
+//	Speed: The program decbin takes ??? cycles to execute.
+//
+//	Object Size:
+//
+//	External Reference(s): None.
+//
+//	Algorithm:
+//	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
+//	and NaN operands are dispatched without entering this routine)
+//	value in 68881/882 format at location ETEMP(A6).
+//
+//	A1.	Convert the bcd exponent to binary by successive adds and muls.
+//	Set the sign according to SE. Subtract 16 to compensate
+//	for the mantissa which is to be interpreted as 17 integer
+//	digits, rather than 1 integer and 16 fraction digits.
+//	Note: this operation can never overflow.
+//
+//	A2. Convert the bcd mantissa to binary by successive
+//	adds and muls in FP0. Set the sign according to SM.
+//	The mantissa digits will be converted with the decimal point
+//	assumed following the least-significant digit.
+//	Note: this operation can never overflow.
+//
+//	A3. Count the number of leading/trailing zeros in the
+//	bcd string.  If SE is positive, count the leading zeros;
+//	if negative, count the trailing zeros.  Set the adjusted
+//	exponent equal to the exponent from A1 and the zero count
+//	added if SM = 1 and subtracted if SM = 0.  Scale the
+//	mantissa the equivalent of forcing in the bcd value:
+//
+//	SM = 0	a non-zero digit in the integer position
+//	SM = 1	a non-zero digit in Mant0, lsd of the fraction
+//
+//	this will insure that any value, regardless of its
+//	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
+//	consistently.
+//
+//	A4. Calculate the factor 10^exp in FP1 using a table of
+//	10^(2^n) values.  To reduce the error in forming factors
+//	greater than 10^27, a directed rounding scheme is used with
+//	tables rounded to RN, RM, and RP, according to the table
+//	in the comments of the pwrten section.
+//
+//	A5. Form the final binary number by scaling the mantissa by
+//	the exponent factor.  This is done by multiplying the
+//	mantissa in FP0 by the factor in FP1 if the adjusted
+//	exponent sign is positive, and dividing FP0 by FP1 if
+//	it is negative.
+//
+//	Clean up and return.  Check if the final mul or div resulted
+//	in an inex2 exception.  If so, set inex1 in the fpsr and 
+//	check if the inex1 exception is enabled.  If so, set d7 upper
+//	word to $0100.  This will signal unimp.sa that an enabled inex1
+//	exception occurred.  Unimp will fix the stack.
+//	
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//DECBIN    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+//
+//	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
+//	to nearest, minus, and plus, respectively.  The tables include
+//	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
+//	is required until the power is greater than 27, however, all
+//	tables include the first 5 for ease of indexing.
+//
+	|xref	PTENRN
+	|xref	PTENRM
+	|xref	PTENRP
+
+RTABLE:	.byte	0,0,0,0
+	.byte	2,3,2,3
+	.byte	2,3,3,2
+	.byte	3,2,2,3
+
+	.global	decbin
+	.global	calc_e
+	.global	pwrten
+	.global	calc_m
+	.global	norm
+	.global	ap_st_z
+	.global	ap_st_n
+//
+	.set	FNIBS,7
+	.set	FSTRT,0
+//
+	.set	ESTRT,4
+	.set	EDIGITS,2	// 
+//
+// Constants in single precision
+FZERO: 	.long	0x00000000
+FONE: 	.long	0x3F800000
+FTEN: 	.long	0x41200000
+
+	.set	TEN,10
+
+//
+decbin:
+	| fmovel	#0,FPCR		;clr real fpcr
+	moveml	%d2-%d5,-(%a7)
+//
+// Calculate exponent:
+//  1. Copy bcd value in memory for use as a working copy.
+//  2. Calculate absolute value of exponent in d1 by mul and add.
+//  3. Correct for exponent sign.
+//  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
+//     (i.e., all digits assumed left of the decimal point.)
+//
+// Register usage:
+//
+//  calc_e:
+//	(*)  d0: temp digit storage
+//	(*)  d1: accumulator for binary exponent
+//	(*)  d2: digit count
+//	(*)  d3: offset pointer
+//	( )  d4: first word of bcd
+//	( )  a0: pointer to working bcd value
+//	( )  a6: pointer to original bcd value
+//	(*)  FP_SCR1: working copy of original bcd value
+//	(*)  L_SCR1: copy of original exponent word
+//
+calc_e:
+	movel	#EDIGITS,%d2	//# of nibbles (digits) in fraction part
+	moveql	#ESTRT,%d3	//counter to pick up digits
+	leal	FP_SCR1(%a6),%a0	//load tmp bcd storage address
+	movel	ETEMP(%a6),(%a0)	//save input bcd value
+	movel	ETEMP_HI(%a6),4(%a0) //save words 2 and 3
+	movel	ETEMP_LO(%a6),8(%a0) //and work with these
+	movel	(%a0),%d4	//get first word of bcd
+	clrl	%d1		//zero d1 for accumulator
+e_gd:
+	mulul	#TEN,%d1	//mul partial product by one digit place
+	bfextu	%d4{%d3:#4},%d0	//get the digit and zero extend into d0
+	addl	%d0,%d1		//d1 = d1 + d0
+	addqb	#4,%d3		//advance d3 to the next digit
+	dbf	%d2,e_gd	//if we have used all 3 digits, exit loop
+	btst	#30,%d4		//get SE
+	beqs	e_pos		//don't negate if pos
+	negl	%d1		//negate before subtracting
+e_pos:
+	subl	#16,%d1		//sub to compensate for shift of mant
+	bges	e_save		//if still pos, do not neg
+	negl	%d1		//now negative, make pos and set SE
+	orl	#0x40000000,%d4	//set SE in d4,
+	orl	#0x40000000,(%a0)	//and in working bcd
+e_save:
+	movel	%d1,L_SCR1(%a6)	//save exp in memory
+//
+//
+// Calculate mantissa:
+//  1. Calculate absolute value of mantissa in fp0 by mul and add.
+//  2. Correct for mantissa sign.
+//     (i.e., all digits assumed left of the decimal point.)
+//
+// Register usage:
+//
+//  calc_m:
+//	(*)  d0: temp digit storage
+//	(*)  d1: lword counter
+//	(*)  d2: digit count
+//	(*)  d3: offset pointer
+//	( )  d4: words 2 and 3 of bcd
+//	( )  a0: pointer to working bcd value
+//	( )  a6: pointer to original bcd value
+//	(*) fp0: mantissa accumulator
+//	( )  FP_SCR1: working copy of original bcd value
+//	( )  L_SCR1: copy of original exponent word
+//
+calc_m:
+	moveql	#1,%d1		//word counter, init to 1
+	fmoves	FZERO,%fp0	//accumulator
+//
+//
+//  Since the packed number has a long word between the first & second parts,
+//  get the integer digit then skip down & get the rest of the
+//  mantissa.  We will unroll the loop once.
+//
+	bfextu	(%a0){#28:#4},%d0	//integer part is ls digit in long word
+	faddb	%d0,%fp0		//add digit to sum in fp0
+//
+//
+//  Get the rest of the mantissa.
+//
+loadlw:
+	movel	(%a0,%d1.L*4),%d4	//load mantissa longword into d4
+	moveql	#FSTRT,%d3	//counter to pick up digits
+	moveql	#FNIBS,%d2	//reset number of digits per a0 ptr
+md2b:
+	fmuls	FTEN,%fp0	//fp0 = fp0 * 10
+	bfextu	%d4{%d3:#4},%d0	//get the digit and zero extend
+	faddb	%d0,%fp0	//fp0 = fp0 + digit
+//
+//
+//  If all the digits (8) in that long word have been converted (d2=0),
+//  then inc d1 (=2) to point to the next long word and reset d3 to 0
+//  to initialize the digit offset, and set d2 to 7 for the digit count;
+//  else continue with this long word.
+//
+	addqb	#4,%d3		//advance d3 to the next digit
+	dbf	%d2,md2b		//check for last digit in this lw
+nextlw:
+	addql	#1,%d1		//inc lw pointer in mantissa
+	cmpl	#2,%d1		//test for last lw
+	ble	loadlw		//if not, get last one
+	
+//
+//  Check the sign of the mant and make the value in fp0 the same sign.
+//
+m_sign:
+	btst	#31,(%a0)	//test sign of the mantissa
+	beqs	ap_st_z		//if clear, go to append/strip zeros
+	fnegx	%fp0		//if set, negate fp0
+	
+//
+// Append/strip zeros:
+//
+//  For adjusted exponents which have an absolute value greater than 27*,
+//  this routine calculates the amount needed to normalize the mantissa
+//  for the adjusted exponent.  That number is subtracted from the exp
+//  if the exp was positive, and added if it was negative.  The purpose
+//  of this is to reduce the value of the exponent and the possibility
+//  of error in calculation of pwrten.
+//
+//  1. Branch on the sign of the adjusted exponent.
+//  2p.(positive exp)
+//   2. Check M16 and the digits in lwords 2 and 3 in descending order.
+//   3. Add one for each zero encountered until a non-zero digit.
+//   4. Subtract the count from the exp.
+//   5. Check if the exp has crossed zero in #3 above; make the exp abs
+//	   and set SE.
+//	6. Multiply the mantissa by 10**count.
+//  2n.(negative exp)
+//   2. Check the digits in lwords 3 and 2 in descending order.
+//   3. Add one for each zero encountered until a non-zero digit.
+//   4. Add the count to the exp.
+//   5. Check if the exp has crossed zero in #3 above; clear SE.
+//   6. Divide the mantissa by 10**count.
+//
+//  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
+//   any adjustment due to append/strip zeros will drive the resultant
+//   exponent towards zero.  Since all pwrten constants with a power
+//   of 27 or less are exact, there is no need to use this routine to
+//   attempt to lessen the resultant exponent.
+//
+// Register usage:
+//
+//  ap_st_z:
+//	(*)  d0: temp digit storage
+//	(*)  d1: zero count
+//	(*)  d2: digit count
+//	(*)  d3: offset pointer
+//	( )  d4: first word of bcd
+//	(*)  d5: lword counter
+//	( )  a0: pointer to working bcd value
+//	( )  FP_SCR1: working copy of original bcd value
+//	( )  L_SCR1: copy of original exponent word
+//
+//
+// First check the absolute value of the exponent to see if this
+// routine is necessary.  If so, then check the sign of the exponent
+// and do append (+) or strip (-) zeros accordingly.
+// This section handles a positive adjusted exponent.
+//
+ap_st_z:
+	movel	L_SCR1(%a6),%d1	//load expA for range test
+	cmpl	#27,%d1		//test is with 27
+	ble	pwrten		//if abs(expA) <28, skip ap/st zeros
+	btst	#30,(%a0)	//check sign of exp
+	bnes	ap_st_n		//if neg, go to neg side
+	clrl	%d1		//zero count reg
+	movel	(%a0),%d4		//load lword 1 to d4
+	bfextu	%d4{#28:#4},%d0	//get M16 in d0
+	bnes	ap_p_fx		//if M16 is non-zero, go fix exp
+	addql	#1,%d1		//inc zero count
+	moveql	#1,%d5		//init lword counter
+	movel	(%a0,%d5.L*4),%d4	//get lword 2 to d4
+	bnes	ap_p_cl		//if lw 2 is zero, skip it
+	addql	#8,%d1		//and inc count by 8
+	addql	#1,%d5		//inc lword counter
+	movel	(%a0,%d5.L*4),%d4	//get lword 3 to d4
+ap_p_cl:
+	clrl	%d3		//init offset reg
+	moveql	#7,%d2		//init digit counter
+ap_p_gd:
+	bfextu	%d4{%d3:#4},%d0	//get digit
+	bnes	ap_p_fx		//if non-zero, go to fix exp
+	addql	#4,%d3		//point to next digit
+	addql	#1,%d1		//inc digit counter
+	dbf	%d2,ap_p_gd	//get next digit
+ap_p_fx:
+	movel	%d1,%d0		//copy counter to d2
+	movel	L_SCR1(%a6),%d1	//get adjusted exp from memory
+	subl	%d0,%d1		//subtract count from exp
+	bges	ap_p_fm		//if still pos, go to pwrten
+	negl	%d1		//now its neg; get abs
+	movel	(%a0),%d4		//load lword 1 to d4
+	orl	#0x40000000,%d4	// and set SE in d4
+	orl	#0x40000000,(%a0)	// and in memory
+//
+// Calculate the mantissa multiplier to compensate for the striping of
+// zeros from the mantissa.
+//
+ap_p_fm:
+	movel	#PTENRN,%a1	//get address of power-of-ten table
+	clrl	%d3		//init table index
+	fmoves	FONE,%fp1	//init fp1 to 1
+	moveql	#3,%d2		//init d2 to count bits in counter
+ap_p_el:
+	asrl	#1,%d0		//shift lsb into carry
+	bccs	ap_p_en		//if 1, mul fp1 by pwrten factor
+	fmulx	(%a1,%d3),%fp1	//mul by 10**(d3_bit_no)
+ap_p_en:
+	addl	#12,%d3		//inc d3 to next rtable entry
+	tstl	%d0		//check if d0 is zero
+	bnes	ap_p_el		//if not, get next bit
+	fmulx	%fp1,%fp0		//mul mantissa by 10**(no_bits_shifted)
+	bras	pwrten		//go calc pwrten
+//
+// This section handles a negative adjusted exponent.
+//
+ap_st_n:
+	clrl	%d1		//clr counter
+	moveql	#2,%d5		//set up d5 to point to lword 3
+	movel	(%a0,%d5.L*4),%d4	//get lword 3
+	bnes	ap_n_cl		//if not zero, check digits
+	subl	#1,%d5		//dec d5 to point to lword 2
+	addql	#8,%d1		//inc counter by 8
+	movel	(%a0,%d5.L*4),%d4	//get lword 2
+ap_n_cl:
+	movel	#28,%d3		//point to last digit
+	moveql	#7,%d2		//init digit counter
+ap_n_gd:
+	bfextu	%d4{%d3:#4},%d0	//get digit
+	bnes	ap_n_fx		//if non-zero, go to exp fix
+	subql	#4,%d3		//point to previous digit
+	addql	#1,%d1		//inc digit counter
+	dbf	%d2,ap_n_gd	//get next digit
+ap_n_fx:
+	movel	%d1,%d0		//copy counter to d0
+	movel	L_SCR1(%a6),%d1	//get adjusted exp from memory
+	subl	%d0,%d1		//subtract count from exp
+	bgts	ap_n_fm		//if still pos, go fix mantissa
+	negl	%d1		//take abs of exp and clr SE
+	movel	(%a0),%d4		//load lword 1 to d4
+	andl	#0xbfffffff,%d4	// and clr SE in d4
+	andl	#0xbfffffff,(%a0)	// and in memory
+//
+// Calculate the mantissa multiplier to compensate for the appending of
+// zeros to the mantissa.
+//
+ap_n_fm:
+	movel	#PTENRN,%a1	//get address of power-of-ten table
+	clrl	%d3		//init table index
+	fmoves	FONE,%fp1	//init fp1 to 1
+	moveql	#3,%d2		//init d2 to count bits in counter
+ap_n_el:
+	asrl	#1,%d0		//shift lsb into carry
+	bccs	ap_n_en		//if 1, mul fp1 by pwrten factor
+	fmulx	(%a1,%d3),%fp1	//mul by 10**(d3_bit_no)
+ap_n_en:
+	addl	#12,%d3		//inc d3 to next rtable entry
+	tstl	%d0		//check if d0 is zero
+	bnes	ap_n_el		//if not, get next bit
+	fdivx	%fp1,%fp0		//div mantissa by 10**(no_bits_shifted)
+//
+//
+// Calculate power-of-ten factor from adjusted and shifted exponent.
+//
+// Register usage:
+//
+//  pwrten:
+//	(*)  d0: temp
+//	( )  d1: exponent
+//	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
+//	(*)  d3: FPCR work copy
+//	( )  d4: first word of bcd
+//	(*)  a1: RTABLE pointer
+//  calc_p:
+//	(*)  d0: temp
+//	( )  d1: exponent
+//	(*)  d3: PWRTxx table index
+//	( )  a0: pointer to working copy of bcd
+//	(*)  a1: PWRTxx pointer
+//	(*) fp1: power-of-ten accumulator
+//
+// Pwrten calculates the exponent factor in the selected rounding mode
+// according to the following table:
+//	
+//	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
+//
+//	ANY	  ANY	RN	RN
+//
+//	 +	   +	RP	RP
+//	 -	   +	RP	RM
+//	 +	   -	RP	RM
+//	 -	   -	RP	RP
+//
+//	 +	   +	RM	RM
+//	 -	   +	RM	RP
+//	 +	   -	RM	RP
+//	 -	   -	RM	RM
+//
+//	 +	   +	RZ	RM
+//	 -	   +	RZ	RM
+//	 +	   -	RZ	RP
+//	 -	   -	RZ	RP
+//
+//
+pwrten:
+	movel	USER_FPCR(%a6),%d3 //get user's FPCR
+	bfextu	%d3{#26:#2},%d2	//isolate rounding mode bits
+	movel	(%a0),%d4		//reload 1st bcd word to d4
+	asll	#2,%d2		//format d2 to be
+	bfextu	%d4{#0:#2},%d0	// {FPCR[6],FPCR[5],SM,SE}
+	addl	%d0,%d2		//in d2 as index into RTABLE
+	leal	RTABLE,%a1	//load rtable base
+	moveb	(%a1,%d2),%d0	//load new rounding bits from table
+	clrl	%d3			//clear d3 to force no exc and extended
+	bfins	%d0,%d3{#26:#2}	//stuff new rounding bits in FPCR
+	fmovel	%d3,%FPCR		//write new FPCR
+	asrl	#1,%d0		//write correct PTENxx table
+	bccs	not_rp		//to a1
+	leal	PTENRP,%a1	//it is RP
+	bras	calc_p		//go to init section
+not_rp:
+	asrl	#1,%d0		//keep checking
+	bccs	not_rm
+	leal	PTENRM,%a1	//it is RM
+	bras	calc_p		//go to init section
+not_rm:
+	leal	PTENRN,%a1	//it is RN
+calc_p:
+	movel	%d1,%d0		//copy exp to d0;use d0
+	bpls	no_neg		//if exp is negative,
+	negl	%d0		//invert it
+	orl	#0x40000000,(%a0)	//and set SE bit
+no_neg:
+	clrl	%d3		//table index
+	fmoves	FONE,%fp1	//init fp1 to 1
+e_loop:
+	asrl	#1,%d0		//shift next bit into carry
+	bccs	e_next		//if zero, skip the mul
+	fmulx	(%a1,%d3),%fp1	//mul by 10**(d3_bit_no)
+e_next:
+	addl	#12,%d3		//inc d3 to next rtable entry
+	tstl	%d0		//check if d0 is zero
+	bnes	e_loop		//not zero, continue shifting
+//
+//
+//  Check the sign of the adjusted exp and make the value in fp0 the
+//  same sign. If the exp was pos then multiply fp1*fp0;
+//  else divide fp0/fp1.
+//
+// Register Usage:
+//  norm:
+//	( )  a0: pointer to working bcd value
+//	(*) fp0: mantissa accumulator
+//	( ) fp1: scaling factor - 10**(abs(exp))
+//
+norm:
+	btst	#30,(%a0)	//test the sign of the exponent
+	beqs	mul		//if clear, go to multiply
+div:
+	fdivx	%fp1,%fp0		//exp is negative, so divide mant by exp
+	bras	end_dec
+mul:
+	fmulx	%fp1,%fp0		//exp is positive, so multiply by exp
+//
+//
+// Clean up and return with result in fp0.
+//
+// If the final mul/div in decbin incurred an inex exception,
+// it will be inex2, but will be reported as inex1 by get_op.
+//
+end_dec:
+	fmovel	%FPSR,%d0		//get status register	
+	bclrl	#inex2_bit+8,%d0	//test for inex2 and clear it
+	fmovel	%d0,%FPSR		//return status reg w/o inex2
+	beqs	no_exc		//skip this if no exc
+	orl	#inx1a_mask,USER_FPSR(%a6) //set inex1/ainex
+no_exc:
+	moveml	(%a7)+,%d2-%d5
+	rts
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/do_func.s b/c/src/lib/libcpu/m68k/m68040/fpsp/do_func.s
new file mode 100644
index 0000000000..e7fcbbb36a
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/do_func.s
@@ -0,0 +1,559 @@
+//
+//	do_func.sa 3.4 2/18/91
+//
+// Do_func performs the unimplemented operation.  The operation
+// to be performed is determined from the lower 7 bits of the
+// extension word (except in the case of fmovecr and fsincos).
+// The opcode and tag bits form an index into a jump table in 
+// tbldo.sa.  Cases of zero, infinity and NaN are handled in 
+// do_func by forcing the default result.  Normalized and
+// denormalized (there are no unnormalized numbers at this
+// point) are passed onto the emulation code.  
+//
+// CMDREG1B and STAG are extracted from the fsave frame
+// and combined to form the table index.  The function called
+// will start with a0 pointing to the ETEMP operand.  Dyadic
+// functions can find FPTEMP at -12(a0).
+//
+// Called functions return their result in fp0.  Sincos returns
+// sin(x) in fp0 and cos(x) in fp1.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+DO_FUNC:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	t_dz2
+	|xref	t_operr
+	|xref	t_inx2
+	|xref 	t_resdnrm
+	|xref	dst_nan
+	|xref	src_nan
+	|xref	nrm_set
+	|xref	sto_cos
+
+	|xref	tblpre
+	|xref	slognp1,slogn,slog10,slog2
+	|xref	slognd,slog10d,slog2d
+	|xref	smod,srem
+	|xref	sscale
+	|xref	smovcr
+
+PONE:	.long	0x3fff0000,0x80000000,0x00000000	//+1
+MONE:	.long	0xbfff0000,0x80000000,0x00000000	//-1
+PZERO:	.long	0x00000000,0x00000000,0x00000000	//+0
+MZERO:	.long	0x80000000,0x00000000,0x00000000	//-0
+PINF:	.long	0x7fff0000,0x00000000,0x00000000	//+inf
+MINF:	.long	0xffff0000,0x00000000,0x00000000	//-inf
+QNAN:	.long	0x7fff0000,0xffffffff,0xffffffff	//non-signaling nan
+PPIBY2:  .long	0x3FFF0000,0xC90FDAA2,0x2168C235	//+PI/2
+MPIBY2:  .long	0xbFFF0000,0xC90FDAA2,0x2168C235	//-PI/2
+
+	.global	do_func
+do_func:
+	clrb	CU_ONLY(%a6)
+//
+// Check for fmovecr.  It does not follow the format of fp gen
+// unimplemented instructions.  The test is on the upper 6 bits;
+// if they are $17, the inst is fmovecr.  Call entry smovcr
+// directly.
+//
+	bfextu	CMDREG1B(%a6){#0:#6},%d0 //get opclass and src fields
+	cmpil	#0x17,%d0		//if op class and size fields are $17, 
+//				;it is FMOVECR; if not, continue
+	bnes	not_fmovecr
+	jmp	smovcr		//fmovecr; jmp directly to emulation
+
+not_fmovecr:
+	movew	CMDREG1B(%a6),%d0
+	andl	#0x7F,%d0
+	cmpil	#0x38,%d0		//if the extension is >= $38, 
+	bges	serror		//it is illegal
+	bfextu	STAG(%a6){#0:#3},%d1
+	lsll	#3,%d0		//make room for STAG
+	addl	%d1,%d0		//combine for final index into table
+	leal	tblpre,%a1	//start of monster jump table
+	movel	(%a1,%d0.w*4),%a1	//real target address
+	leal	ETEMP(%a6),%a0	//a0 is pointer to src op
+	movel	USER_FPCR(%a6),%d1
+	andl	#0xFF,%d1		// discard all but rounding mode/prec
+	fmovel	#0,%fpcr
+	jmp	(%a1)
+//
+//	ERROR
+//
+	.global	serror
+serror:
+	st	STORE_FLG(%a6)
+	rts
+//
+// These routines load forced values into fp0.  They are called
+// by index into tbldo.
+//
+// Load a signed zero to fp0 and set inex2/ainex
+//
+	.global	snzrinx
+snzrinx:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//get sign of source operand
+	bnes	ld_mzinx	//if negative, branch
+	bsr	ld_pzero	//bsr so we can return and set inx
+	bra	t_inx2		//now, set the inx for the next inst
+ld_mzinx:
+	bsr	ld_mzero	//if neg, load neg zero, return here
+	bra	t_inx2		//now, set the inx for the next inst
+//
+// Load a signed zero to fp0; do not set inex2/ainex 
+//
+	.global	szero
+szero:
+	btstb	#sign_bit,LOCAL_EX(%a0) //get sign of source operand
+	bne	ld_mzero	//if neg, load neg zero
+	bra	ld_pzero	//load positive zero
+//
+// Load a signed infinity to fp0; do not set inex2/ainex 
+//
+	.global	sinf
+sinf:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//get sign of source operand
+	bne	ld_minf			//if negative branch
+	bra	ld_pinf
+//
+// Load a signed one to fp0; do not set inex2/ainex 
+//
+	.global	sone
+sone:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//check sign of source
+	bne	ld_mone
+	bra	ld_pone
+//
+// Load a signed pi/2 to fp0; do not set inex2/ainex 
+//
+	.global	spi_2
+spi_2:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//check sign of source
+	bne	ld_mpi2
+	bra	ld_ppi2
+//
+// Load either a +0 or +inf for plus/minus operand
+//
+	.global	szr_inf
+szr_inf:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//check sign of source
+	bne	ld_pzero
+	bra	ld_pinf
+//
+// Result is either an operr or +inf for plus/minus operand
+// [Used by slogn, slognp1, slog10, and slog2]
+//
+	.global	sopr_inf
+sopr_inf:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//check sign of source
+	bne	t_operr
+	bra	ld_pinf
+//
+//	FLOGNP1 
+//
+	.global	sslognp1
+sslognp1:
+	fmovemx (%a0),%fp0-%fp0
+	fcmpb	#-1,%fp0
+	fbgt	slognp1		
+	fbeq	t_dz2		//if = -1, divide by zero exception
+	fmovel	#0,%FPSR		//clr N flag
+	bra	t_operr		//take care of operands < -1
+//
+//	FETOXM1
+//
+	.global	setoxm1i
+setoxm1i:
+	btstb	#sign_bit,LOCAL_EX(%a0)	//check sign of source
+	bne	ld_mone
+	bra	ld_pinf
+//
+//	FLOGN
+//
+// Test for 1.0 as an input argument, returning +zero.  Also check
+// the sign and return operr if negative.
+//
+	.global	sslogn
+sslogn:
+	btstb	#sign_bit,LOCAL_EX(%a0) 
+	bne	t_operr		//take care of operands < 0
+	cmpiw	#0x3fff,LOCAL_EX(%a0) //test for 1.0 input
+	bne	slogn
+	cmpil	#0x80000000,LOCAL_HI(%a0)
+	bne	slogn
+	tstl	LOCAL_LO(%a0)
+	bne	slogn
+	fmovex	PZERO,%fp0
+	rts
+
+	.global	sslognd
+sslognd:
+	btstb	#sign_bit,LOCAL_EX(%a0) 
+	beq	slognd
+	bra	t_operr		//take care of operands < 0
+
+//
+//	FLOG10
+//
+	.global	sslog10
+sslog10:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bne	t_operr		//take care of operands < 0
+	cmpiw	#0x3fff,LOCAL_EX(%a0) //test for 1.0 input
+	bne	slog10
+	cmpil	#0x80000000,LOCAL_HI(%a0)
+	bne	slog10
+	tstl	LOCAL_LO(%a0)
+	bne	slog10
+	fmovex	PZERO,%fp0
+	rts
+
+	.global	sslog10d
+sslog10d:
+	btstb	#sign_bit,LOCAL_EX(%a0) 
+	beq	slog10d
+	bra	t_operr		//take care of operands < 0
+
+//
+//	FLOG2
+//
+	.global	sslog2
+sslog2:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bne	t_operr		//take care of operands < 0
+	cmpiw	#0x3fff,LOCAL_EX(%a0) //test for 1.0 input
+	bne	slog2
+	cmpil	#0x80000000,LOCAL_HI(%a0)
+	bne	slog2
+	tstl	LOCAL_LO(%a0)
+	bne	slog2
+	fmovex	PZERO,%fp0
+	rts
+
+	.global	sslog2d
+sslog2d:
+	btstb	#sign_bit,LOCAL_EX(%a0) 
+	beq	slog2d
+	bra	t_operr		//take care of operands < 0
+
+//
+//	FMOD
+//
+pmodt:
+//				;$21 fmod
+//				;dtag,stag
+	.long	smod		//  00,00  norm,norm = normal
+	.long	smod_oper	//  00,01  norm,zero = nan with operr
+	.long	smod_fpn	//  00,10  norm,inf  = fpn
+	.long	smod_snan	//  00,11  norm,nan  = nan
+	.long	smod_zro	//  01,00  zero,norm = +-zero
+	.long	smod_oper	//  01,01  zero,zero = nan with operr
+	.long	smod_zro	//  01,10  zero,inf  = +-zero
+	.long	smod_snan	//  01,11  zero,nan  = nan
+	.long	smod_oper	//  10,00  inf,norm  = nan with operr
+	.long	smod_oper	//  10,01  inf,zero  = nan with operr
+	.long	smod_oper	//  10,10  inf,inf   = nan with operr
+	.long	smod_snan	//  10,11  inf,nan   = nan
+	.long	smod_dnan	//  11,00  nan,norm  = nan
+	.long	smod_dnan	//  11,01  nan,zero  = nan
+	.long	smod_dnan	//  11,10  nan,inf   = nan
+	.long	smod_dnan	//  11,11  nan,nan   = nan
+
+	.global	pmod
+pmod:
+	clrb	FPSR_QBYTE(%a6) // clear quotient field
+	bfextu	STAG(%a6){#0:#3},%d0 //stag = d0
+	bfextu	DTAG(%a6){#0:#3},%d1 //dtag = d1
+
+//
+// Alias extended denorms to norms for the jump table.
+//
+	bclrl	#2,%d0
+	bclrl	#2,%d1
+
+	lslb	#2,%d1
+	orb	%d0,%d1		//d1{3:2} = dtag, d1{1:0} = stag
+//				;Tag values:
+//				;00 = norm or denorm
+//				;01 = zero
+//				;10 = inf
+//				;11 = nan
+	lea	pmodt,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+smod_snan:
+	bra	src_nan
+smod_dnan:
+	bra	dst_nan
+smod_oper:
+	bra	t_operr
+smod_zro:
+	moveb	ETEMP(%a6),%d1	//get sign of src op
+	moveb	FPTEMP(%a6),%d0	//get sign of dst op
+	eorb	%d0,%d1		//get exor of sign bits
+	btstl	#7,%d1		//test for sign
+	beqs	smod_zsn	//if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) //set q-byte sign bit
+smod_zsn:
+	btstl	#7,%d0		//test if + or -
+	beq	ld_pzero	//if pos then load +0
+	bra	ld_mzero	//else neg load -0
+	
+smod_fpn:
+	moveb	ETEMP(%a6),%d1	//get sign of src op
+	moveb	FPTEMP(%a6),%d0	//get sign of dst op
+	eorb	%d0,%d1		//get exor of sign bits
+	btstl	#7,%d1		//test for sign
+	beqs	smod_fsn	//if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) //set q-byte sign bit
+smod_fsn:
+	tstb	DTAG(%a6)	//filter out denormal destination case
+	bpls	smod_nrm	//
+	leal	FPTEMP(%a6),%a0	//a0<- addr(FPTEMP)
+	bra	t_resdnrm	//force UNFL(but exact) result
+smod_nrm:
+	fmovel USER_FPCR(%a6),%fpcr //use user's rmode and precision
+	fmovex FPTEMP(%a6),%fp0	//return dest to fp0
+	rts
+		
+//
+//	FREM
+//
+premt:
+//				;$25 frem
+//				;dtag,stag
+	.long	srem		//  00,00  norm,norm = normal
+	.long	srem_oper	//  00,01  norm,zero = nan with operr
+	.long	srem_fpn	//  00,10  norm,inf  = fpn
+	.long	srem_snan	//  00,11  norm,nan  = nan
+	.long	srem_zro	//  01,00  zero,norm = +-zero
+	.long	srem_oper	//  01,01  zero,zero = nan with operr
+	.long	srem_zro	//  01,10  zero,inf  = +-zero
+	.long	srem_snan	//  01,11  zero,nan  = nan
+	.long	srem_oper	//  10,00  inf,norm  = nan with operr
+	.long	srem_oper	//  10,01  inf,zero  = nan with operr
+	.long	srem_oper	//  10,10  inf,inf   = nan with operr
+	.long	srem_snan	//  10,11  inf,nan   = nan
+	.long	srem_dnan	//  11,00  nan,norm  = nan
+	.long	srem_dnan	//  11,01  nan,zero  = nan
+	.long	srem_dnan	//  11,10  nan,inf   = nan
+	.long	srem_dnan	//  11,11  nan,nan   = nan
+
+	.global	prem
+prem:
+	clrb	FPSR_QBYTE(%a6)   //clear quotient field
+	bfextu	STAG(%a6){#0:#3},%d0 //stag = d0
+	bfextu	DTAG(%a6){#0:#3},%d1 //dtag = d1
+//
+// Alias extended denorms to norms for the jump table.
+//
+	bclr	#2,%d0
+	bclr	#2,%d1
+
+	lslb	#2,%d1
+	orb	%d0,%d1		//d1{3:2} = dtag, d1{1:0} = stag
+//				;Tag values:
+//				;00 = norm or denorm
+//				;01 = zero
+//				;10 = inf
+//				;11 = nan
+	lea	premt,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+	
+srem_snan:
+	bra	src_nan
+srem_dnan:
+	bra	dst_nan
+srem_oper:
+	bra	t_operr
+srem_zro:
+	moveb	ETEMP(%a6),%d1	//get sign of src op
+	moveb	FPTEMP(%a6),%d0	//get sign of dst op
+	eorb	%d0,%d1		//get exor of sign bits
+	btstl	#7,%d1		//test for sign
+	beqs	srem_zsn	//if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) //set q-byte sign bit
+srem_zsn:
+	btstl	#7,%d0		//test if + or -
+	beq	ld_pzero	//if pos then load +0
+	bra	ld_mzero	//else neg load -0
+	
+srem_fpn:
+	moveb	ETEMP(%a6),%d1	//get sign of src op
+	moveb	FPTEMP(%a6),%d0	//get sign of dst op
+	eorb	%d0,%d1		//get exor of sign bits
+	btstl	#7,%d1		//test for sign
+	beqs	srem_fsn	//if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) //set q-byte sign bit
+srem_fsn:
+	tstb	DTAG(%a6)	//filter out denormal destination case
+	bpls	srem_nrm	//
+	leal	FPTEMP(%a6),%a0	//a0<- addr(FPTEMP)
+	bra	t_resdnrm	//force UNFL(but exact) result
+srem_nrm:
+	fmovel USER_FPCR(%a6),%fpcr //use user's rmode and precision
+	fmovex FPTEMP(%a6),%fp0	//return dest to fp0
+	rts
+//
+//	FSCALE
+//
+pscalet:
+//				;$26 fscale
+//				;dtag,stag
+	.long	sscale		//  00,00  norm,norm = result
+	.long	sscale		//  00,01  norm,zero = fpn
+	.long	scl_opr		//  00,10  norm,inf  = nan with operr
+	.long	scl_snan	//  00,11  norm,nan  = nan
+	.long	scl_zro		//  01,00  zero,norm = +-zero
+	.long	scl_zro		//  01,01  zero,zero = +-zero
+	.long	scl_opr		//  01,10  zero,inf  = nan with operr
+	.long	scl_snan	//  01,11  zero,nan  = nan
+	.long	scl_inf		//  10,00  inf,norm  = +-inf
+	.long	scl_inf		//  10,01  inf,zero  = +-inf
+	.long	scl_opr		//  10,10  inf,inf   = nan with operr
+ 	.long	scl_snan	//  10,11  inf,nan   = nan
+ 	.long	scl_dnan	//  11,00  nan,norm  = nan
+ 	.long	scl_dnan	//  11,01  nan,zero  = nan
+ 	.long	scl_dnan	//  11,10  nan,inf   = nan
+	.long	scl_dnan	//  11,11  nan,nan   = nan
+
+	.global	pscale
+pscale:
+	bfextu	STAG(%a6){#0:#3},%d0 //stag in d0
+	bfextu	DTAG(%a6){#0:#3},%d1 //dtag in d1
+	bclrl	#2,%d0		//alias  denorm into norm
+	bclrl	#2,%d1		//alias  denorm into norm
+	lslb	#2,%d1
+	orb	%d0,%d1		//d1{4:2} = dtag, d1{1:0} = stag
+//				;dtag values     stag values:
+//				;000 = norm      00 = norm
+//				;001 = zero	 01 = zero
+//				;010 = inf	 10 = inf
+//				;011 = nan	 11 = nan
+//				;100 = dnrm
+//
+//
+	leal	pscalet,%a1	//load start of jump table
+	movel	(%a1,%d1.w*4),%a1	//load a1 with label depending on tag
+	jmp	(%a1)		//go to the routine
+
+scl_opr:
+	bra	t_operr
+
+scl_dnan:
+	bra	dst_nan
+
+scl_zro:
+	btstb	#sign_bit,FPTEMP_EX(%a6)	//test if + or -
+	beq	ld_pzero		//if pos then load +0
+	bra	ld_mzero		//if neg then load -0
+scl_inf:
+	btstb	#sign_bit,FPTEMP_EX(%a6)	//test if + or -
+	beq	ld_pinf			//if pos then load +inf
+	bra	ld_minf			//else neg load -inf
+scl_snan:
+	bra	src_nan
+//
+//	FSINCOS
+//
+	.global	ssincosz
+ssincosz:
+	btstb	#sign_bit,ETEMP(%a6)	//get sign
+	beqs	sincosp
+	fmovex	MZERO,%fp0
+	bras	sincoscom
+sincosp:
+	fmovex PZERO,%fp0
+sincoscom:
+  	fmovemx PONE,%fp1-%fp1	//do not allow FPSR to be affected
+	bra	sto_cos		//store cosine result
+
+	.global	ssincosi
+ssincosi:
+	fmovex QNAN,%fp1	//load NAN
+	bsr	sto_cos		//store cosine result
+	fmovex QNAN,%fp0	//load NAN
+	bra	t_operr
+
+	.global	ssincosnan
+ssincosnan:
+	movel	ETEMP_EX(%a6),FP_SCR1(%a6)
+	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
+	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
+	bsetb	#signan_bit,FP_SCR1+4(%a6)
+	fmovemx FP_SCR1(%a6),%fp1-%fp1
+	bsr	sto_cos
+	bra	src_nan
+//
+// This code forces default values for the zero, inf, and nan cases 
+// in the transcendentals code.  The CC bits must be set in the
+// stacked FPSR to be correctly reported.
+//
+//**Returns +PI/2
+	.global	ld_ppi2
+ld_ppi2:
+	fmovex PPIBY2,%fp0		//load +pi/2
+	bra	t_inx2			//set inex2 exc
+
+//**Returns -PI/2
+	.global	ld_mpi2
+ld_mpi2:
+	fmovex MPIBY2,%fp0		//load -pi/2
+	orl	#neg_mask,USER_FPSR(%a6)	//set N bit
+	bra	t_inx2			//set inex2 exc
+
+//**Returns +inf
+	.global	ld_pinf
+ld_pinf:
+	fmovex PINF,%fp0		//load +inf
+	orl	#inf_mask,USER_FPSR(%a6)	//set I bit
+	rts
+
+//**Returns -inf
+	.global	ld_minf
+ld_minf:
+	fmovex MINF,%fp0		//load -inf
+	orl	#neg_mask+inf_mask,USER_FPSR(%a6)	//set N and I bits
+	rts
+
+//**Returns +1
+	.global	ld_pone
+ld_pone:
+	fmovex PONE,%fp0		//load +1
+	rts
+
+//**Returns -1
+	.global	ld_mone
+ld_mone:
+	fmovex MONE,%fp0		//load -1
+	orl	#neg_mask,USER_FPSR(%a6)	//set N bit
+	rts
+
+//**Returns +0
+	.global	ld_pzero
+ld_pzero:
+	fmovex PZERO,%fp0		//load +0
+	orl	#z_mask,USER_FPSR(%a6)	//set Z bit
+	rts
+
+//**Returns -0
+	.global	ld_mzero
+ld_mzero:
+	fmovex MZERO,%fp0		//load -0
+	orl	#neg_mask+z_mask,USER_FPSR(%a6)	//set N and Z bits
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/fpsp.defs b/c/src/lib/libcpu/m68k/m68040/fpsp/fpsp.defs
new file mode 100644
index 0000000000..ca93677e98
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/fpsp.defs
@@ -0,0 +1,348 @@
+|
+|	fpsp.h 3.3 3.3
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+|	The copyright notice above does not evidence any  
+|	actual or intended publication of such source code.
+
+|	fpsp.h --- stack frame offsets during FPSP exception handling
+|
+|	These equates are used to access the exception frame, the fsave
+|	frame and any local variables needed by the FPSP package.
+|	
+|	All FPSP handlers begin by executing:
+|
+|		link	a6,#-LOCAL_SIZE
+|		fsave	-(a7)
+|		movem.l	d0-d1/a0-a1,USER_DA(a6)
+|		fmovem.x fp0-fp3,USER_FP0(a6)
+|		fmove.l	fpsr/fpcr/fpiar,USER_FPSR(a6)
+|
+|	After initialization, the stack looks like this:
+|
+|	A7 --->	+-------------------------------+
+|		|				|
+|		|	FPU fsave area		|
+|		|				|
+|		+-------------------------------+
+|		|				|
+|		|	FPSP Local Variables	|
+|		|	     including		|
+|		|	  saved registers	|
+|		|				|
+|		+-------------------------------+
+|	A6 --->	|	Saved A6		|
+|		+-------------------------------+
+|		|				|
+|		|	Exception Frame		|
+|		|				|
+|		|				|
+|
+|	Positive offsets from A6 refer to the exception frame.  Negative
+|	offsets refer to the Local Variable area and the fsave area.
+|	The fsave frame is also accessible 'from the top' via A7.
+|
+|	On exit, the handlers execute:
+|
+|		movem.l	USER_DA(a6),d0-d1/a0-a1
+|		fmovem.x USER_FP0(a6),fp0-fp3
+|		fmove.l	USER_FPSR(a6),fpsr/fpcr/fpiar
+|		frestore (a7)+
+|		unlk	a6
+|
+|	and then either 'bra fpsp_done' if the exception was completely
+|	handled	by the package, or 'bra real_xxxx' which is an external
+|	label to a routine that will process a real exception of the
+|	type that was generated.  Some handlers may omit the 'frestore'
+|	if the FPU state after the exception is idle.
+|
+|	Sometimes the exception handler will transform the fsave area
+|	because it needs to report an exception back to the user.  This
+|	can happen if the package is entered for an unimplemented float
+|	instruction that generates (say) an underflow.  Alternatively,
+|	a second fsave frame can be pushed onto the stack and the
+|	handler	exit code will reload the new frame and discard the old.
+|
+|	The registers d0, d1, a0, a1 and fp0-fp3 are always saved and
+|	restored from the 'local variable' area and can be used as
+|	temporaries.  If a routine needs to change any
+|	of these registers, it should modify the saved copy and let
+|	the handler exit code restore the value.
+|
+|----------------------------------------------------------------------
+|
+|	Local Variables on the stack
+|
+	.set		LOCAL_SIZE,192	| bytes needed for local variables
+	.set		LV,-LOCAL_SIZE	| convenient base value
+|
+	.set		USER_DA,LV+0	| save space for D0-D1,A0-A1
+	.set		USER_D0,LV+0	| saved user D0
+	.set		USER_D1,LV+4	| saved user D1
+	.set		USER_A0,LV+8	| saved user A0
+	.set		USER_A1,LV+12	| saved user A1
+	.set		USER_FP0,LV+16	| saved user FP0
+	.set		USER_FP1,LV+28	| saved user FP1
+	.set		USER_FP2,LV+40	| saved user FP2
+	.set		USER_FP3,LV+52	| saved user FP3
+	.set		USER_FPCR,LV+64	| saved user FPCR
+	.set		FPCR_ENABLE,USER_FPCR+2	| FPCR exception enable 
+	.set		FPCR_MODE,USER_FPCR+3	| FPCR rounding mode control
+	.set		USER_FPSR,LV+68	| saved user FPSR
+	.set		FPSR_CC,USER_FPSR+0	| FPSR condition code
+	.set		FPSR_QBYTE,USER_FPSR+1	| FPSR quotient
+	.set		FPSR_EXCEPT,USER_FPSR+2	| FPSR exception
+	.set		FPSR_AEXCEPT,USER_FPSR+3	| FPSR accrued exception
+	.set		USER_FPIAR,LV+72	| saved user FPIAR
+	.set		FP_SCR1,LV+76	| room for a temporary float value
+	.set		FP_SCR2,LV+92	| room for a temporary float value
+	.set		L_SCR1,LV+108	| room for a temporary long value
+	.set		L_SCR2,LV+112	| room for a temporary long value
+	.set		STORE_FLG,LV+116
+	.set		BINDEC_FLG,LV+117	| used in bindec
+	.set		DNRM_FLG,LV+118	| used in res_func
+	.set		RES_FLG,LV+119	| used in res_func
+	.set		DY_MO_FLG,LV+120	| dyadic/monadic flag
+	.set		UFLG_TMP,LV+121	| temporary for uflag errata
+	.set		CU_ONLY,LV+122	| cu-only flag
+	.set		VER_TMP,LV+123	| temp holding for version number
+	.set		L_SCR3,LV+124	| room for a temporary long value
+	.set		FP_SCR3,LV+128	| room for a temporary float value
+	.set		FP_SCR4,LV+144	| room for a temporary float value
+	.set		FP_SCR5,LV+160	| room for a temporary float value
+	.set		FP_SCR6,LV+176
+|
+|NEXT		equ	LV+192		;need to increase LOCAL_SIZE
+|
+|--------------------------------------------------------------------------
+|
+|	fsave offsets and bit definitions
+|
+|	Offsets are defined from the end of an fsave because the last 10
+|	words of a busy frame are the same as the unimplemented frame.
+|
+	.set		CU_SAVEPC,LV-92	| micro-pc for CU (1 byte)
+	.set		FPR_DIRTY_BITS,LV-91	| fpr dirty bits
+|
+	.set		WBTEMP,LV-76	| write back temp (12 bytes)
+	.set		WBTEMP_EX,WBTEMP	| wbtemp sign and exponent (2 bytes)
+	.set		WBTEMP_HI,WBTEMP+4	| wbtemp mantissa [63:32] (4 bytes)
+	.set		WBTEMP_LO,WBTEMP+8	| wbtemp mantissa [31:00] (4 bytes)
+|
+	.set		WBTEMP_SGN,WBTEMP+2	| used to store sign
+|
+	.set		FPSR_SHADOW,LV-64	| fpsr shadow reg
+|
+	.set		FPIARCU,LV-60	| Instr. addr. reg. for CU (4 bytes)
+|
+	.set		CMDREG2B,LV-52	| cmd reg for machine 2
+	.set		CMDREG3B,LV-48	| cmd reg for E3 exceptions (2 bytes)
+|
+	.set		NMNEXC,LV-44	| NMNEXC (unsup,snan bits only)
+	.set		nmn_unsup_bit,1
+	.set		nmn_snan_bit,0
+|
+	.set		NMCEXC,LV-43	| NMNEXC & NMCEXC
+	.set		nmn_operr_bit,7
+	.set		nmn_ovfl_bit,6
+	.set		nmn_unfl_bit,5
+	.set		nmc_unsup_bit,4
+	.set		nmc_snan_bit,3
+	.set		nmc_operr_bit,2
+	.set		nmc_ovfl_bit,1
+	.set		nmc_unfl_bit,0
+|
+	.set		STAG,LV-40	| source tag (1 byte)
+	.set		WBTEMP_GRS,LV-40	| alias wbtemp guard, round, sticky
+	.set		guard_bit,1	| guard bit is bit number 1
+	.set		round_bit,0	| round bit is bit number 0
+	.set		stag_mask,0xE0	| upper 3 bits are source tag type
+	.set		denorm_bit,7	| bit determins if denorm or unnorm
+	.set		etemp15_bit,4	| etemp exponent bit #15
+	.set		wbtemp66_bit,2	| wbtemp mantissa bit #66
+	.set		wbtemp1_bit,1	| wbtemp mantissa bit #1
+	.set		wbtemp0_bit,0	| wbtemp mantissa bit #0
+|
+	.set		STICKY,LV-39	| holds sticky bit
+	.set		sticky_bit,7
+|
+	.set		CMDREG1B,LV-36	| cmd reg for E1 exceptions (2 bytes)
+	.set		kfact_bit,12	| distinguishes static/dynamic k-factor
+|					;on packed move outs.  NOTE: this
+|					;equate only works when CMDREG1B is in
+|					;a register.
+|
+	.set		CMDWORD,LV-35	| command word in cmd1b
+	.set		direction_bit,5	| bit 0 in opclass
+	.set		size_bit2,12	| bit 2 in size field
+|
+	.set		DTAG,LV-32	| dest tag (1 byte)
+	.set		dtag_mask,0xE0	| upper 3 bits are dest type tag
+	.set		fptemp15_bit,4	| fptemp exponent bit #15
+|
+	.set		WB_BYTE,LV-31	| holds WBTE15 bit (1 byte)
+	.set		wbtemp15_bit,4	| wbtemp exponent bit #15
+|
+	.set		E_BYTE,LV-28	| holds E1 and E3 bits (1 byte)
+	.set		E1,2		| which bit is E1 flag
+	.set		E3,1		| which bit is E3 flag
+	.set		SFLAG,0		| which bit is S flag
+|
+	.set		T_BYTE,LV-27	| holds T and U bits (1 byte)
+	.set		XFLAG,7		| which bit is X flag
+	.set		UFLAG,5		| which bit is U flag
+	.set		TFLAG,4		| which bit is T flag
+|
+	.set		FPTEMP,LV-24	| fptemp (12 bytes)
+	.set		FPTEMP_EX,FPTEMP	| fptemp sign and exponent (2 bytes)
+	.set		FPTEMP_HI,FPTEMP+4	| fptemp mantissa [63:32] (4 bytes)
+	.set		FPTEMP_LO,FPTEMP+8	| fptemp mantissa [31:00] (4 bytes)
+|
+	.set		FPTEMP_SGN,FPTEMP+2	| used to store sign
+|
+	.set		ETEMP,LV-12	| etemp (12 bytes)
+	.set		ETEMP_EX,ETEMP	| etemp sign and exponent (2 bytes)
+	.set		ETEMP_HI,ETEMP+4	| etemp mantissa [63:32] (4 bytes)
+	.set		ETEMP_LO,ETEMP+8	| etemp mantissa [31:00] (4 bytes)
+|
+	.set		ETEMP_SGN,ETEMP+2	| used to store sign
+|
+	.set		EXC_SR,4	| exception frame status register
+	.set		EXC_PC,6	| exception frame program counter
+	.set		EXC_VEC,10	| exception frame vector (format+vector#)
+	.set		EXC_EA,12	| exception frame effective address
+|
+|--------------------------------------------------------------------------
+|
+|	FPSR/FPCR bits
+|
+	.set		neg_bit,3	| negative result
+	.set		z_bit,2		| zero result
+	.set		inf_bit,1	| infinity result
+	.set		nan_bit,0	| not-a-number result
+|
+	.set		q_sn_bit,7	| sign bit of quotient byte
+|
+	.set		bsun_bit,7	| branch on unordered
+	.set		snan_bit,6	| signalling nan
+	.set		operr_bit,5	| operand error
+	.set		ovfl_bit,4	| overflow
+	.set		unfl_bit,3	| underflow
+	.set		dz_bit,2	| divide by zero
+	.set		inex2_bit,1	| inexact result 2
+	.set		inex1_bit,0	| inexact result 1
+|
+	.set		aiop_bit,7	| accrued illegal operation
+	.set		aovfl_bit,6	| accrued overflow
+	.set		aunfl_bit,5	| accrued underflow
+	.set		adz_bit,4	| accrued divide by zero
+	.set		ainex_bit,3	| accrued inexact
+|
+|	FPSR individual bit masks
+|
+	.set		neg_mask,0x08000000
+	.set		z_mask,0x04000000
+	.set		inf_mask,0x02000000
+	.set		nan_mask,0x01000000
+|
+	.set		bsun_mask,0x00008000
+	.set		snan_mask,0x00004000
+	.set		operr_mask,0x00002000
+	.set		ovfl_mask,0x00001000
+	.set		unfl_mask,0x00000800
+	.set		dz_mask,0x00000400
+	.set		inex2_mask,0x00000200
+	.set		inex1_mask,0x00000100
+|
+	.set		aiop_mask,0x00000080	| accrued illegal operation
+	.set		aovfl_mask,0x00000040	| accrued overflow
+	.set		aunfl_mask,0x00000020	| accrued underflow
+	.set		adz_mask,0x00000010	| accrued divide by zero
+	.set		ainex_mask,0x00000008	| accrued inexact
+|
+|	FPSR combinations used in the FPSP
+|
+	.set		dzinf_mask,inf_mask+dz_mask+adz_mask
+	.set		opnan_mask,nan_mask+operr_mask+aiop_mask
+	.set		nzi_mask,0x01ffffff	| clears N, Z, and I
+	.set		unfinx_mask,unfl_mask+inex2_mask+aunfl_mask+ainex_mask
+	.set		unf2inx_mask,unfl_mask+inex2_mask+ainex_mask
+	.set		ovfinx_mask,ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
+	.set		inx1a_mask,inex1_mask+ainex_mask
+	.set		inx2a_mask,inex2_mask+ainex_mask
+	.set		snaniop_mask,nan_mask+snan_mask+aiop_mask
+	.set		naniop_mask,nan_mask+aiop_mask
+	.set		neginf_mask,neg_mask+inf_mask
+	.set		infaiop_mask,inf_mask+aiop_mask
+	.set		negz_mask,neg_mask+z_mask
+	.set		opaop_mask,operr_mask+aiop_mask
+	.set		unfl_inx_mask,unfl_mask+aunfl_mask+ainex_mask
+	.set		ovfl_inx_mask,ovfl_mask+aovfl_mask+ainex_mask
+|
+|--------------------------------------------------------------------------
+|
+|	FPCR rounding modes
+|
+	.set		x_mode,0x00	| round to extended
+	.set		s_mode,0x40	| round to single
+	.set		d_mode,0x80	| round to double
+|
+	.set		rn_mode,0x00	| round nearest
+	.set		rz_mode,0x10	| round to zero
+	.set		rm_mode,0x20	| round to minus infinity
+	.set		rp_mode,0x30	| round to plus infinity
+|
+|--------------------------------------------------------------------------
+|
+|	Miscellaneous equates
+|
+	.set		signan_bit,6	| signalling nan bit in mantissa
+	.set		sign_bit,7
+|
+	.set		rnd_stky_bit,29	| round/sticky bit of mantissa
+|				this can only be used if in a data register
+	.set		sx_mask,0x01800000	| set s and x bits in word $48
+|
+	.set		LOCAL_EX,0
+	.set		LOCAL_SGN,2
+	.set		LOCAL_HI,4
+	.set		LOCAL_LO,8
+	.set		LOCAL_GRS,12	| valid ONLY for FP_SCR1, FP_SCR2
+|
+|
+	.set		norm_tag,0x00	| tag bits in {7:5} position
+	.set		zero_tag,0x20
+	.set		inf_tag,0x40
+	.set		nan_tag,0x60
+	.set		dnrm_tag,0x80
+|
+|	fsave sizes and formats
+|
+	.set		VER_4,0x40	| fpsp compatible version numbers
+|					are in the $40s {$40-$4f}
+	.set		VER_40,0x40	| original version number
+	.set		VER_41,0x41	| revision version number
+|
+	.set		BUSY_SIZE,100	| size of busy frame
+	.set		BUSY_FRAME,LV-BUSY_SIZE	| start of busy frame
+|
+	.set		UNIMP_40_SIZE,44	| size of orig unimp frame
+	.set		UNIMP_41_SIZE,52	| size of rev unimp frame
+|
+	.set		IDLE_SIZE,4	| size of idle frame
+	.set		IDLE_FRAME,LV-IDLE_SIZE	| start of idle frame
+|
+|	exception vectors
+|
+	.set		TRACE_VEC,0x2024	| trace trap
+	.set		FLINE_VEC,0x002C	| 'real' F-line
+	.set		UNIMP_VEC,0x202C	| unimplemented
+	.set		INEX_VEC,0x00C4
+|
+	.set		dbl_thresh,0x3C01
+	.set		sgl_thresh,0x3F81
+|
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/gen_except.s b/c/src/lib/libcpu/m68k/m68040/fpsp/gen_except.s
new file mode 100644
index 0000000000..7c9ff527fc
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/gen_except.s
@@ -0,0 +1,468 @@
+//
+//	gen_except.sa 3.7 1/16/92
+//
+//	gen_except --- FPSP routine to detect reportable exceptions
+//	
+//	This routine compares the exception enable byte of the
+//	user_fpcr on the stack with the exception status byte
+//	of the user_fpsr. 
+//
+//	Any routine which may report an exceptions must load
+//	the stack frame in memory with the exceptional operand(s).
+//
+//	Priority for exceptions is:
+//
+//	Highest:	bsun
+//			snan
+//			operr
+//			ovfl
+//			unfl
+//			dz
+//			inex2
+//	Lowest:		inex1
+//
+//	Note: The IEEE standard specifies that inex2 is to be
+//	reported if ovfl occurs and the ovfl enable bit is not
+//	set but the inex2 enable bit is.  
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+GEN_EXCEPT:    //idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section 8
+
+	.include "fpsp.defs"
+
+	|xref	real_trace
+	|xref	fpsp_done
+	|xref	fpsp_fmt_error
+
+exc_tbl:
+	.long	bsun_exc
+	.long	commonE1
+	.long	commonE1
+	.long	ovfl_unfl
+	.long	ovfl_unfl
+	.long	commonE1
+	.long	commonE3
+	.long	commonE3
+	.long	no_match
+
+	.global	gen_except
+gen_except:
+	cmpib	#IDLE_SIZE-4,1(%a7)	//test for idle frame
+	beq	do_check		//go handle idle frame
+	cmpib	#UNIMP_40_SIZE-4,1(%a7)	//test for orig unimp frame
+	beqs	unimp_x			//go handle unimp frame
+	cmpib	#UNIMP_41_SIZE-4,1(%a7)	//test for rev unimp frame
+	beqs	unimp_x			//go handle unimp frame
+	cmpib	#BUSY_SIZE-4,1(%a7)	//if size <> $60, fmt error
+	bnel	fpsp_fmt_error
+	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1 //init a1 so fpsp.h
+//					;equates will work
+// Fix up the new busy frame with entries from the unimp frame
+//
+	movel	ETEMP_EX(%a6),ETEMP_EX(%a1) //copy etemp from unimp
+	movel	ETEMP_HI(%a6),ETEMP_HI(%a1) //frame to busy frame
+	movel	ETEMP_LO(%a6),ETEMP_LO(%a1) 
+	movel	CMDREG1B(%a6),CMDREG1B(%a1) //set inst in frame to unimp
+	movel	CMDREG1B(%a6),%d0		//fix cmd1b to make it
+	andl	#0x03c30000,%d0		//work for cmd3b
+	bfextu	CMDREG1B(%a6){#13:#1},%d1	//extract bit 2
+	lsll	#5,%d1			
+	swap	%d1
+	orl	%d1,%d0			//put it in the right place
+	bfextu	CMDREG1B(%a6){#10:#3},%d1	//extract bit 3,4,5
+	lsll	#2,%d1
+	swap	%d1
+	orl	%d1,%d0			//put them in the right place
+	movel	%d0,CMDREG3B(%a1)		//in the busy frame
+//
+// Or in the FPSR from the emulation with the USER_FPSR on the stack.
+//
+	fmovel	%FPSR,%d0		
+	orl	%d0,USER_FPSR(%a6)
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) //set exc bits
+	orl	#sx_mask,E_BYTE(%a1)
+	bra	do_clean
+
+//
+// Frame is an unimp frame possible resulting from an fmove <ea>,fp0
+// that caused an exception
+//
+// a1 is modified to point into the new frame allowing fpsp equates
+// to be valid.
+//
+unimp_x:
+	cmpib	#UNIMP_40_SIZE-4,1(%a7)	//test for orig unimp frame
+	bnes	test_rev
+	leal	UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1
+	bras	unimp_con
+test_rev:
+	cmpib	#UNIMP_41_SIZE-4,1(%a7)	//test for rev unimp frame
+	bnel	fpsp_fmt_error		//if not $28 or $30
+	leal	UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1
+	
+unimp_con:
+//
+// Fix up the new unimp frame with entries from the old unimp frame
+//
+	movel	CMDREG1B(%a6),CMDREG1B(%a1) //set inst in frame to unimp
+//
+// Or in the FPSR from the emulation with the USER_FPSR on the stack.
+//
+	fmovel	%FPSR,%d0		
+	orl	%d0,USER_FPSR(%a6)
+	bra	do_clean
+
+//
+// Frame is idle, so check for exceptions reported through
+// USER_FPSR and set the unimp frame accordingly.  
+// A7 must be incremented to the point before the
+// idle fsave vector to the unimp vector.
+//
+	
+do_check:
+	addl	#4,%a7			//point A7 back to unimp frame
+//
+// Or in the FPSR from the emulation with the USER_FPSR on the stack.
+//
+	fmovel	%FPSR,%d0		
+	orl	%d0,USER_FPSR(%a6)
+//
+// On a busy frame, we must clear the nmnexc bits.
+//
+	cmpib	#BUSY_SIZE-4,1(%a7)	//check frame type
+	bnes	check_fr		//if busy, clr nmnexc
+	clrw	NMNEXC(%a6)		//clr nmnexc & nmcexc
+	btstb	#5,CMDREG1B(%a6)		//test for fmove out
+	bnes	frame_com
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6) //set exc bits
+	orl	#sx_mask,E_BYTE(%a6)
+	bras	frame_com
+check_fr:
+	cmpb	#UNIMP_40_SIZE-4,1(%a7)
+	beqs	frame_com
+	clrw	NMNEXC(%a6)
+frame_com:
+	moveb	FPCR_ENABLE(%a6),%d0	//get fpcr enable byte
+	andb	FPSR_EXCEPT(%a6),%d0	//and in the fpsr exc byte
+	bfffo	%d0{#24:#8},%d1		//test for first set bit
+	leal	exc_tbl,%a0		//load jmp table address
+	subib	#24,%d1			//normalize bit offset to 0-8
+	movel	(%a0,%d1.w*4),%a0		//load routine address based
+//					;based on first enabled exc
+	jmp	(%a0)			//jump to routine
+//
+// Bsun is not possible in unimp or unsupp
+//
+bsun_exc:
+	bra	do_clean
+//
+// The typical work to be done to the unimp frame to report an 
+// exception is to set the E1/E3 byte and clr the U flag.
+// commonE1 does this for E1 exceptions, which are snan, 
+// operr, and dz.  commonE3 does this for E3 exceptions, which 
+// are inex2 and inex1, and also clears the E1 exception bit
+// left over from the unimp exception.
+//
+commonE1:
+	bsetb	#E1,E_BYTE(%a6)		//set E1 flag
+	bra	commonE			//go clean and exit
+
+commonE3:
+	tstb	UFLG_TMP(%a6)		//test flag for unsup/unimp state
+	bnes	unsE3
+uniE3:
+	bsetb	#E3,E_BYTE(%a6)		//set E3 flag
+	bclrb	#E1,E_BYTE(%a6)		//clr E1 from unimp
+	bra	commonE
+
+unsE3:
+	tstb	RES_FLG(%a6)
+	bnes	unsE3_0	
+unsE3_1:
+	bsetb	#E3,E_BYTE(%a6)		//set E3 flag
+unsE3_0:
+	bclrb	#E1,E_BYTE(%a6)		//clr E1 flag
+	movel	CMDREG1B(%a6),%d0
+	andl	#0x03c30000,%d0		//work for cmd3b
+	bfextu	CMDREG1B(%a6){#13:#1},%d1	//extract bit 2
+	lsll	#5,%d1			
+	swap	%d1
+	orl	%d1,%d0			//put it in the right place
+	bfextu	CMDREG1B(%a6){#10:#3},%d1	//extract bit 3,4,5
+	lsll	#2,%d1
+	swap	%d1
+	orl	%d1,%d0			//put them in the right place
+	movel	%d0,CMDREG3B(%a6)		//in the busy frame
+
+commonE:
+	bclrb	#UFLAG,T_BYTE(%a6)	//clr U flag from unimp
+	bra	do_clean		//go clean and exit
+//
+// No bits in the enable byte match existing exceptions.  Check for
+// the case of the ovfl exc without the ovfl enabled, but with
+// inex2 enabled.
+//
+no_match:
+	btstb	#inex2_bit,FPCR_ENABLE(%a6) //check for ovfl/inex2 case
+	beqs	no_exc			//if clear, exit
+	btstb	#ovfl_bit,FPSR_EXCEPT(%a6) //now check ovfl
+	beqs	no_exc			//if clear, exit
+	bras	ovfl_unfl		//go to unfl_ovfl to determine if
+//					;it is an unsupp or unimp exc
+	
+// No exceptions are to be reported.  If the instruction was 
+// unimplemented, no FPU restore is necessary.  If it was
+// unsupported, we must perform the restore.
+no_exc:
+	tstb	UFLG_TMP(%a6)	//test flag for unsupp/unimp state
+	beqs	uni_no_exc
+uns_no_exc:
+	tstb	RES_FLG(%a6)	//check if frestore is needed
+	bne	do_clean 	//if clear, no frestore needed
+uni_no_exc:
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx USER_FP0(%a6),%fp0-%fp3
+	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk	%a6
+	bra	finish_up
+//
+// Unsupported Data Type Handler:
+// Ovfl:
+//   An fmoveout that results in an overflow is reported this way.
+// Unfl:
+//   An fmoveout that results in an underflow is reported this way.
+//
+// Unimplemented Instruction Handler:
+// Ovfl:
+//   Only scosh, setox, ssinh, stwotox, and scale can set overflow in 
+//   this manner.
+// Unfl:
+//   Stwotox, setox, and scale can set underflow in this manner.
+//   Any of the other Library Routines such that f(x)=x in which
+//   x is an extended denorm can report an underflow exception. 
+//   It is the responsibility of the exception-causing exception 
+//   to make sure that WBTEMP is correct.
+//
+//   The exceptional operand is in FP_SCR1.
+//
+ovfl_unfl:
+	tstb	UFLG_TMP(%a6)	//test flag for unsupp/unimp state
+	beqs	ofuf_con
+//
+// The caller was from an unsupported data type trap.  Test if the
+// caller set CU_ONLY.  If so, the exceptional operand is expected in
+// FPTEMP, rather than WBTEMP.
+//
+	tstb	CU_ONLY(%a6)		//test if inst is cu-only
+	beq	unsE3
+//	move.w	#$fe,CU_SAVEPC(%a6)
+	clrb	CU_SAVEPC(%a6)
+	bsetb	#E1,E_BYTE(%a6)		//set E1 exception flag
+	movew	ETEMP_EX(%a6),FPTEMP_EX(%a6)
+	movel	ETEMP_HI(%a6),FPTEMP_HI(%a6)
+	movel	ETEMP_LO(%a6),FPTEMP_LO(%a6)
+	bsetb	#fptemp15_bit,DTAG(%a6)	//set fpte15
+	bclrb	#UFLAG,T_BYTE(%a6)	//clr U flag from unimp
+	bra	do_clean		//go clean and exit
+
+ofuf_con:
+	moveb	(%a7),VER_TMP(%a6)	//save version number
+	cmpib	#BUSY_SIZE-4,1(%a7)	//check for busy frame
+	beqs	busy_fr			//if unimp, grow to busy
+	cmpib	#VER_40,(%a7)		//test for orig unimp frame
+	bnes	try_41			//if not, test for rev frame
+	moveql	#13,%d0			//need to zero 14 lwords
+	bras	ofuf_fin
+try_41:
+	cmpib	#VER_41,(%a7)		//test for rev unimp frame
+	bnel	fpsp_fmt_error		//if neither, exit with error
+	moveql	#11,%d0			//need to zero 12 lwords
+
+ofuf_fin:
+	clrl	(%a7)
+loop1:
+	clrl	-(%a7)			//clear and dec a7
+	dbra	%d0,loop1
+	moveb	VER_TMP(%a6),(%a7)
+	moveb	#BUSY_SIZE-4,1(%a7)		//write busy fmt word.
+busy_fr:
+	movel	FP_SCR1(%a6),WBTEMP_EX(%a6)	//write
+	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a6)	//exceptional op to
+	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a6)	//wbtemp
+	bsetb	#E3,E_BYTE(%a6)			//set E3 flag
+	bclrb	#E1,E_BYTE(%a6)			//make sure E1 is clear
+	bclrb	#UFLAG,T_BYTE(%a6)		//clr U flag
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl	#sx_mask,E_BYTE(%a6)
+	movel	CMDREG1B(%a6),%d0		//fix cmd1b to make it
+	andl	#0x03c30000,%d0		//work for cmd3b
+	bfextu	CMDREG1B(%a6){#13:#1},%d1	//extract bit 2
+	lsll	#5,%d1			
+	swap	%d1
+	orl	%d1,%d0			//put it in the right place
+	bfextu	CMDREG1B(%a6){#10:#3},%d1	//extract bit 3,4,5
+	lsll	#2,%d1
+	swap	%d1
+	orl	%d1,%d0			//put them in the right place
+	movel	%d0,CMDREG3B(%a6)		//in the busy frame
+
+//
+// Check if the frame to be restored is busy or unimp.
+//** NOTE *** Bug fix for errata (0d43b #3)
+// If the frame is unimp, we must create a busy frame to 
+// fix the bug with the nmnexc bits in cases in which they
+// are set by a previous instruction and not cleared by
+// the save. The frame will be unimp only if the final 
+// instruction in an emulation routine caused the exception
+// by doing an fmove <ea>,fp0.  The exception operand, in
+// internal format, is in fptemp.
+//
+do_clean:
+	cmpib	#UNIMP_40_SIZE-4,1(%a7)
+	bnes	do_con
+	moveql	#13,%d0			//in orig, need to zero 14 lwords
+	bras	do_build
+do_con:
+	cmpib	#UNIMP_41_SIZE-4,1(%a7)
+	bnes	do_restore		//frame must be busy
+	moveql	#11,%d0			//in rev, need to zero 12 lwords
+
+do_build:
+	moveb	(%a7),VER_TMP(%a6)
+	clrl	(%a7)
+loop2:
+	clrl	-(%a7)			//clear and dec a7
+	dbra	%d0,loop2
+//
+// Use a1 as pointer into new frame.  a6 is not correct if an unimp or
+// busy frame was created as the result of an exception on the final
+// instruction of an emulation routine.
+//
+// We need to set the nmcexc bits if the exception is E1. Otherwise,
+// the exc taken will be inex2.
+//
+	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1	//init a1 for new frame
+	moveb	VER_TMP(%a6),(%a7)	//write busy fmt word
+	moveb	#BUSY_SIZE-4,1(%a7)
+	movel	FP_SCR1(%a6),WBTEMP_EX(%a1) 	//write
+	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a1)	//exceptional op to
+	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a1)	//wbtemp
+//	btst.b	#E1,E_BYTE(%a1)
+//	beq.b	do_restore
+	bfextu	USER_FPSR(%a6){#17:#4},%d0	//get snan/operr/ovfl/unfl bits
+	bfins	%d0,NMCEXC(%a1){#4:#4}	//and insert them in nmcexc
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) //set exc bits
+	orl	#sx_mask,E_BYTE(%a1)
+	
+do_restore:
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx USER_FP0(%a6),%fp0-%fp3
+	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore (%a7)+
+	tstb	RES_FLG(%a6)	//RES_FLG indicates a "continuation" frame
+	beq	cont
+	bsr	bug1384
+cont:
+	unlk	%a6
+//
+// If trace mode enabled, then go to trace handler.  This handler 
+// cannot have any fp instructions.  If there are fp inst's and an 
+// exception has been restored into the machine then the exception 
+// will occur upon execution of the fp inst.  This is not desirable 
+// in the kernel (supervisor mode).  See MC68040 manual Section 9.3.8.
+//
+finish_up:
+	btstb	#7,(%a7)		//test T1 in SR
+	bnes	g_trace
+	btstb	#6,(%a7)		//test T0 in SR
+	bnes	g_trace
+	bral	fpsp_done
+//
+// Change integer stack to look like trace stack
+// The address of the instruction that caused the
+// exception is already in the integer stack (is
+// the same as the saved friar)
+//
+// If the current frame is already a 6-word stack then all
+// that needs to be done is to change the vector# to TRACE.
+// If the frame is only a 4-word stack (meaning we got here
+// on an Unsupported data type exception), then we need to grow
+// the stack an extra 2 words and get the FPIAR from the FPU.
+//
+g_trace:
+	bftst	EXC_VEC-4(%sp){#0:#4}
+	bne	g_easy
+
+	subw	#4,%sp		// make room
+	movel	4(%sp),(%sp)
+	movel	8(%sp),4(%sp)
+	subw	#BUSY_SIZE,%sp
+	fsave	(%sp)
+	fmovel	%fpiar,BUSY_SIZE+EXC_EA-4(%sp)
+	frestore (%sp)
+	addw	#BUSY_SIZE,%sp
+
+g_easy:
+	movew	#TRACE_VEC,EXC_VEC-4(%a7)
+	bral	real_trace
+//
+//  This is a work-around for hardware bug 1384.
+//
+bug1384:
+	link	%a5,#0
+	fsave	-(%sp)
+	cmpib	#0x41,(%sp)	// check for correct frame
+	beq	frame_41
+	bgt	nofix		// if more advanced mask, do nada
+
+frame_40:
+	tstb	1(%sp)		// check to see if idle
+	bne	notidle
+idle40:
+	clrl	(%sp)		// get rid of old fsave frame
+        movel  %d1,USER_D1(%a6)  // save d1
+	movew	#8,%d1		// place unimp frame instead
+loop40:	clrl	-(%sp)
+	dbra	%d1,loop40
+        movel  USER_D1(%a6),%d1  // restore d1
+	movel	#0x40280000,-(%sp)
+	frestore (%sp)+
+	unlk  	%a5	
+	rts
+
+frame_41:
+	tstb	1(%sp)		// check to see if idle
+	bne	notidle	
+idle41:
+	clrl	(%sp)		// get rid of old fsave frame
+        movel  %d1,USER_D1(%a6)  // save d1
+	movew	#10,%d1		// place unimp frame instead
+loop41:	clrl	-(%sp)
+	dbra	%d1,loop41
+        movel  USER_D1(%a6),%d1  // restore d1
+	movel	#0x41300000,-(%sp)
+	frestore (%sp)+
+	unlk	%a5	
+	rts
+
+notidle:
+	bclrb	#etemp15_bit,-40(%a5) 
+	frestore (%sp)+
+	unlk	%a5	
+	rts
+
+nofix:
+	frestore (%sp)+
+	unlk	%a5	
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/get_op.s b/c/src/lib/libcpu/m68k/m68040/fpsp/get_op.s
new file mode 100644
index 0000000000..bd56f74c51
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/get_op.s
@@ -0,0 +1,676 @@
+//
+//	get_op.sa 3.6 5/19/92
+//
+//	get_op.sa 3.5 4/26/91
+//
+//  Description: This routine is called by the unsupported format/data
+// type exception handler ('unsupp' - vector 55) and the unimplemented
+// instruction exception handler ('unimp' - vector 11).  'get_op'
+// determines the opclass (0, 2, or 3) and branches to the
+// opclass handler routine.  See 68881/2 User's Manual table 4-11
+// for a description of the opclasses.
+//
+// For UNSUPPORTED data/format (exception vector 55) and for
+// UNIMPLEMENTED instructions (exception vector 11) the following
+// applies:
+//
+// - For unnormalized numbers (opclass 0, 2, or 3) the
+// number(s) is normalized and the operand type tag is updated.
+//		
+// - For a packed number (opclass 2) the number is unpacked and the
+// operand type tag is updated.
+//
+// - For denormalized numbers (opclass 0 or 2) the number(s) is not
+// changed but passed to the next module.  The next module for
+// unimp is do_func, the next module for unsupp is res_func.
+//
+// For UNSUPPORTED data/format (exception vector 55) only the
+// following applies:
+//
+// - If there is a move out with a packed number (opclass 3) the
+// number is packed and written to user memory.  For the other
+// opclasses the number(s) are written back to the fsave stack
+// and the instruction is then restored back into the '040.  The
+// '040 is then able to complete the instruction.
+//
+// For example:
+// fadd.x fpm,fpn where the fpm contains an unnormalized number.
+// The '040 takes an unsupported data trap and gets to this
+// routine.  The number is normalized, put back on the stack and
+// then an frestore is done to restore the instruction back into
+// the '040.  The '040 then re-executes the fadd.x fpm,fpn with
+// a normalized number in the source and the instruction is
+// successful.
+//		
+// Next consider if in the process of normalizing the un-
+// normalized number it becomes a denormalized number.  The
+// routine which converts the unnorm to a norm (called mk_norm)
+// detects this and tags the number as a denorm.  The routine
+// res_func sees the denorm tag and converts the denorm to a
+// norm.  The instruction is then restored back into the '040
+// which re_executes the instruction.
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+GET_OP:    //idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	.global	PIRN,PIRZRM,PIRP
+	.global	SMALRN,SMALRZRM,SMALRP
+	.global	BIGRN,BIGRZRM,BIGRP
+
+PIRN:
+	.long 0x40000000,0xc90fdaa2,0x2168c235    //pi
+PIRZRM:
+	.long 0x40000000,0xc90fdaa2,0x2168c234    //pi
+PIRP:
+	.long 0x40000000,0xc90fdaa2,0x2168c235    //pi
+
+//round to nearest
+SMALRN:
+	.long 0x3ffd0000,0x9a209a84,0xfbcff798    //log10(2)
+	.long 0x40000000,0xadf85458,0xa2bb4a9a    //e
+	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc    //log2(e)
+	.long 0x3ffd0000,0xde5bd8a9,0x37287195    //log10(e)
+	.long 0x00000000,0x00000000,0x00000000    //0.0
+// round to zero;round to negative infinity
+SMALRZRM:
+	.long 0x3ffd0000,0x9a209a84,0xfbcff798    //log10(2)
+	.long 0x40000000,0xadf85458,0xa2bb4a9a    //e
+	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bb    //log2(e)
+	.long 0x3ffd0000,0xde5bd8a9,0x37287195    //log10(e)
+	.long 0x00000000,0x00000000,0x00000000    //0.0
+// round to positive infinity
+SMALRP:
+	.long 0x3ffd0000,0x9a209a84,0xfbcff799    //log10(2)
+	.long 0x40000000,0xadf85458,0xa2bb4a9b    //e
+	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc    //log2(e)
+	.long 0x3ffd0000,0xde5bd8a9,0x37287195    //log10(e)
+	.long 0x00000000,0x00000000,0x00000000    //0.0
+
+//round to nearest
+BIGRN:
+	.long 0x3ffe0000,0xb17217f7,0xd1cf79ac    //ln(2)
+	.long 0x40000000,0x935d8ddd,0xaaa8ac17    //ln(10)
+	.long 0x3fff0000,0x80000000,0x00000000    //10 ^ 0
+
+	.global	PTENRN
+PTENRN:
+	.long 0x40020000,0xA0000000,0x00000000    //10 ^ 1
+	.long 0x40050000,0xC8000000,0x00000000    //10 ^ 2
+	.long 0x400C0000,0x9C400000,0x00000000    //10 ^ 4
+	.long 0x40190000,0xBEBC2000,0x00000000    //10 ^ 8
+	.long 0x40340000,0x8E1BC9BF,0x04000000    //10 ^ 16
+	.long 0x40690000,0x9DC5ADA8,0x2B70B59E    //10 ^ 32
+	.long 0x40D30000,0xC2781F49,0xFFCFA6D5    //10 ^ 64
+	.long 0x41A80000,0x93BA47C9,0x80E98CE0    //10 ^ 128
+	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E    //10 ^ 256
+	.long 0x46A30000,0xE319A0AE,0xA60E91C7    //10 ^ 512
+	.long 0x4D480000,0xC9767586,0x81750C17    //10 ^ 1024
+	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5    //10 ^ 2048
+	.long 0x75250000,0xC4605202,0x8A20979B    //10 ^ 4096
+//round to minus infinity
+BIGRZRM:
+	.long 0x3ffe0000,0xb17217f7,0xd1cf79ab    //ln(2)
+	.long 0x40000000,0x935d8ddd,0xaaa8ac16    //ln(10)
+	.long 0x3fff0000,0x80000000,0x00000000    //10 ^ 0
+
+	.global	PTENRM
+PTENRM:
+	.long 0x40020000,0xA0000000,0x00000000    //10 ^ 1
+	.long 0x40050000,0xC8000000,0x00000000    //10 ^ 2
+	.long 0x400C0000,0x9C400000,0x00000000    //10 ^ 4
+	.long 0x40190000,0xBEBC2000,0x00000000    //10 ^ 8
+	.long 0x40340000,0x8E1BC9BF,0x04000000    //10 ^ 16
+	.long 0x40690000,0x9DC5ADA8,0x2B70B59D    //10 ^ 32
+	.long 0x40D30000,0xC2781F49,0xFFCFA6D5    //10 ^ 64
+	.long 0x41A80000,0x93BA47C9,0x80E98CDF    //10 ^ 128
+	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8D    //10 ^ 256
+	.long 0x46A30000,0xE319A0AE,0xA60E91C6    //10 ^ 512
+	.long 0x4D480000,0xC9767586,0x81750C17    //10 ^ 1024
+	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5    //10 ^ 2048
+	.long 0x75250000,0xC4605202,0x8A20979A    //10 ^ 4096
+//round to positive infinity
+BIGRP:
+	.long 0x3ffe0000,0xb17217f7,0xd1cf79ac    //ln(2)
+	.long 0x40000000,0x935d8ddd,0xaaa8ac17    //ln(10)
+	.long 0x3fff0000,0x80000000,0x00000000    //10 ^ 0
+
+	.global	PTENRP
+PTENRP:
+	.long 0x40020000,0xA0000000,0x00000000    //10 ^ 1
+	.long 0x40050000,0xC8000000,0x00000000    //10 ^ 2
+	.long 0x400C0000,0x9C400000,0x00000000    //10 ^ 4
+	.long 0x40190000,0xBEBC2000,0x00000000    //10 ^ 8
+	.long 0x40340000,0x8E1BC9BF,0x04000000    //10 ^ 16
+	.long 0x40690000,0x9DC5ADA8,0x2B70B59E    //10 ^ 32
+	.long 0x40D30000,0xC2781F49,0xFFCFA6D6    //10 ^ 64
+	.long 0x41A80000,0x93BA47C9,0x80E98CE0    //10 ^ 128
+	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E    //10 ^ 256
+	.long 0x46A30000,0xE319A0AE,0xA60E91C7    //10 ^ 512
+	.long 0x4D480000,0xC9767586,0x81750C18    //10 ^ 1024
+	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE6    //10 ^ 2048
+	.long 0x75250000,0xC4605202,0x8A20979B    //10 ^ 4096
+
+	|xref	nrm_zero
+	|xref	decbin
+	|xref	round
+
+	.global    get_op
+	.global    uns_getop
+	.global    uni_getop
+get_op:
+	clrb	DY_MO_FLG(%a6)
+	tstb	UFLG_TMP(%a6)	//test flag for unsupp/unimp state
+	beqs	uni_getop
+
+uns_getop:
+	btstb	#direction_bit,CMDREG1B(%a6)
+	bne	opclass3	//branch if a fmove out (any kind)
+	btstb	#6,CMDREG1B(%a6)
+	beqs	uns_notpacked
+
+	bfextu	CMDREG1B(%a6){#3:#3},%d0
+	cmpb	#3,%d0
+	beq	pack_source	//check for a packed src op, branch if so
+uns_notpacked:
+	bsr	chk_dy_mo	//set the dyadic/monadic flag
+	tstb	DY_MO_FLG(%a6)
+	beqs	src_op_ck	//if monadic, go check src op
+//				;else, check dst op (fall through)
+
+	btstb	#7,DTAG(%a6)
+	beqs	src_op_ck	//if dst op is norm, check src op
+	bras	dst_ex_dnrm	//else, handle destination unnorm/dnrm
+
+uni_getop:
+	bfextu	CMDREG1B(%a6){#0:#6},%d0 //get opclass and src fields
+	cmpil	#0x17,%d0		//if op class and size fields are $17, 
+//				;it is FMOVECR; if not, continue
+//
+// If the instruction is fmovecr, exit get_op.  It is handled
+// in do_func and smovecr.sa.
+//
+	bne	not_fmovecr	//handle fmovecr as an unimplemented inst
+	rts
+
+not_fmovecr:
+	btstb	#E1,E_BYTE(%a6)	//if set, there is a packed operand
+	bne	pack_source	//check for packed src op, branch if so
+
+// The following lines of are coded to optimize on normalized operands
+	moveb	STAG(%a6),%d0
+	orb	DTAG(%a6),%d0	//check if either of STAG/DTAG msb set
+	bmis	dest_op_ck	//if so, some op needs to be fixed
+	rts
+
+dest_op_ck:
+	btstb	#7,DTAG(%a6)	//check for unsupported data types in
+	beqs	src_op_ck	//the destination, if not, check src op
+	bsr	chk_dy_mo	//set dyadic/monadic flag
+	tstb	DY_MO_FLG(%a6)	//
+	beqs	src_op_ck	//if monadic, check src op
+//
+// At this point, destination has an extended denorm or unnorm.
+//
+dst_ex_dnrm:
+	movew	FPTEMP_EX(%a6),%d0 //get destination exponent
+	andiw	#0x7fff,%d0	//mask sign, check if exp = 0000
+	beqs	src_op_ck	//if denorm then check source op.
+//				;denorms are taken care of in res_func 
+//				;(unsupp) or do_func (unimp)
+//				;else unnorm fall through
+	leal	FPTEMP(%a6),%a0	//point a0 to dop - used in mk_norm
+	bsr	mk_norm		//go normalize - mk_norm returns:
+//				;L_SCR1{7:5} = operand tag 
+//				;	(000 = norm, 100 = denorm)
+//				;L_SCR1{4} = fpte15 or ete15 
+//				;	0 = exp >  $3fff
+//				;	1 = exp <= $3fff
+//				;and puts the normalized num back 
+//				;on the fsave stack
+//
+	moveb L_SCR1(%a6),DTAG(%a6) //write the new tag & fpte15 
+//				;to the fsave stack and fall 
+//				;through to check source operand
+//
+src_op_ck:
+	btstb	#7,STAG(%a6)
+	beq	end_getop	//check for unsupported data types on the
+//				;source operand
+	btstb	#5,STAG(%a6)
+	bnes	src_sd_dnrm	//if bit 5 set, handle sgl/dbl denorms
+//
+// At this point only unnorms or extended denorms are possible.
+//
+src_ex_dnrm:
+	movew	ETEMP_EX(%a6),%d0 //get source exponent
+	andiw	#0x7fff,%d0	//mask sign, check if exp = 0000
+	beq	end_getop	//if denorm then exit, denorms are 
+//				;handled in do_func
+	leal	ETEMP(%a6),%a0	//point a0 to sop - used in mk_norm
+	bsr	mk_norm		//go normalize - mk_norm returns:
+//				;L_SCR1{7:5} = operand tag 
+//				;	(000 = norm, 100 = denorm)
+//				;L_SCR1{4} = fpte15 or ete15 
+//				;	0 = exp >  $3fff
+//				;	1 = exp <= $3fff
+//				;and puts the normalized num back 
+//				;on the fsave stack
+//
+	moveb	L_SCR1(%a6),STAG(%a6) //write the new tag & ete15 
+	rts			//end_getop
+
+//
+// At this point, only single or double denorms are possible.
+// If the inst is not fmove, normalize the source.  If it is,
+// do nothing to the input.
+//
+src_sd_dnrm:
+	btstb	#4,CMDREG1B(%a6)	//differentiate between sgl/dbl denorm
+	bnes	is_double
+is_single:
+	movew	#0x3f81,%d1	//write bias for sgl denorm
+	bras	common		//goto the common code
+is_double:
+	movew	#0x3c01,%d1	//write the bias for a dbl denorm
+common:
+	btstb	#sign_bit,ETEMP_EX(%a6) //grab sign bit of mantissa
+	beqs	pos	
+	bset	#15,%d1		//set sign bit because it is negative
+pos:
+	movew	%d1,ETEMP_EX(%a6)
+//				;put exponent on stack
+
+	movew	CMDREG1B(%a6),%d1
+	andw	#0xe3ff,%d1	//clear out source specifier
+	orw	#0x0800,%d1	//set source specifier to extended prec
+	movew	%d1,CMDREG1B(%a6)	//write back to the command word in stack
+//				;this is needed to fix unsupp data stack
+	leal	ETEMP(%a6),%a0	//point a0 to sop
+	
+	bsr	mk_norm		//convert sgl/dbl denorm to norm
+	moveb	L_SCR1(%a6),STAG(%a6) //put tag into source tag reg - d0
+	rts			//end_getop
+//
+// At this point, the source is definitely packed, whether
+// instruction is dyadic or monadic is still unknown
+//
+pack_source:
+	movel	FPTEMP_LO(%a6),ETEMP(%a6)	//write ms part of packed 
+//				;number to etemp slot
+	bsr	chk_dy_mo	//set dyadic/monadic flag
+	bsr	unpack
+
+	tstb	DY_MO_FLG(%a6)
+	beqs	end_getop	//if monadic, exit
+//				;else, fix FPTEMP
+pack_dya:
+	bfextu	CMDREG1B(%a6){#6:#3},%d0 //extract dest fp reg
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0		//set up d0 as a dynamic register mask
+	fmovemx %d0,FPTEMP(%a6)	//write to FPTEMP
+
+	btstb	#7,DTAG(%a6)	//check dest tag for unnorm or denorm
+	bne	dst_ex_dnrm	//else, handle the unnorm or ext denorm
+//
+// Dest is not denormalized.  Check for norm, and set fpte15 
+// accordingly.
+//
+	moveb	DTAG(%a6),%d0
+	andib	#0xf0,%d0		//strip to only dtag:fpte15
+	tstb	%d0		//check for normalized value
+	bnes	end_getop	//if inf/nan/zero leave get_op
+	movew	FPTEMP_EX(%a6),%d0
+	andiw	#0x7fff,%d0
+	cmpiw	#0x3fff,%d0	//check if fpte15 needs setting
+	bges	end_getop	//if >= $3fff, leave fpte15=0
+	orb	#0x10,DTAG(%a6)
+	bras	end_getop
+
+//
+// At this point, it is either an fmoveout packed, unnorm or denorm
+//
+opclass3:
+	clrb	DY_MO_FLG(%a6)	//set dyadic/monadic flag to monadic
+	bfextu	CMDREG1B(%a6){#4:#2},%d0
+	cmpib	#3,%d0
+	bne	src_ex_dnrm	//if not equal, must be unnorm or denorm
+//				;else it is a packed move out
+//				;exit
+end_getop:
+	rts
+
+//
+// Sets the DY_MO_FLG correctly. This is used only on if it is an
+// unsupported data type exception.  Set if dyadic.
+//
+chk_dy_mo:
+	movew	CMDREG1B(%a6),%d0	
+	btstl	#5,%d0		//testing extension command word
+	beqs	set_mon		//if bit 5 = 0 then monadic
+	btstl	#4,%d0		//know that bit 5 = 1
+	beqs	set_dya		//if bit 4 = 0 then dyadic
+	andiw	#0x007f,%d0	//get rid of all but extension bits {6:0}
+	cmpiw 	#0x0038,%d0	//if extension = $38 then fcmp (dyadic)
+	bnes	set_mon
+set_dya:
+	st	DY_MO_FLG(%a6)	//set the inst flag type to dyadic
+	rts
+set_mon:
+	clrb	DY_MO_FLG(%a6)	//set the inst flag type to monadic
+	rts
+//
+//	MK_NORM
+//
+// Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl
+// exception if denorm.
+//
+// CASE opclass 0x0 unsupp
+//	mk_norm till msb set
+//	set tag = norm
+//
+// CASE opclass 0x0 unimp
+//	mk_norm till msb set or exp = 0
+//	if integer bit = 0
+//	   tag = denorm
+//	else
+//	   tag = norm
+//
+// CASE opclass 011 unsupp
+//	mk_norm till msb set or exp = 0
+//	if integer bit = 0
+//	   tag = denorm
+//	   set unfl_nmcexe = 1
+//	else
+//	   tag = norm
+//
+// if exp <= $3fff
+//   set ete15 or fpte15 = 1
+// else set ete15 or fpte15 = 0
+
+// input:
+//	a0 = points to operand to be normalized
+// output:
+//	L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm)
+//	L_SCR1{4}   = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff)
+//	the normalized operand is placed back on the fsave stack
+mk_norm:	
+	clrl	L_SCR1(%a6)
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)	//transform into internal extended format
+
+	cmpib	#0x2c,1+EXC_VEC(%a6) //check if unimp
+	bnes	uns_data	//branch if unsupp
+	bsr	uni_inst	//call if unimp (opclass 0x0)
+	bras	reload
+uns_data:
+	btstb	#direction_bit,CMDREG1B(%a6) //check transfer direction
+	bnes	bit_set		//branch if set (opclass 011)
+	bsr	uns_opx		//call if opclass 0x0
+	bras	reload
+bit_set:
+	bsr	uns_op3		//opclass 011
+reload:
+	cmpw	#0x3fff,LOCAL_EX(%a0) //if exp > $3fff
+	bgts	end_mk		//   fpte15/ete15 already set to 0
+	bsetb	#4,L_SCR1(%a6)	//else set fpte15/ete15 to 1
+//				;calling routine actually sets the 
+//				;value on the stack (along with the 
+//				;tag), since this routine doesn't 
+//				;know if it should set ete15 or fpte15
+//				;ie, it doesn't know if this is the 
+//				;src op or dest op.
+end_mk:
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	end_mk_pos
+	bsetb	#sign_bit,LOCAL_EX(%a0) //convert back to IEEE format
+end_mk_pos:
+	rts
+//
+//     CASE opclass 011 unsupp
+//
+uns_op3:
+	bsr	nrm_zero	//normalize till msb = 1 or exp = zero
+	btstb	#7,LOCAL_HI(%a0)	//if msb = 1
+	bnes	no_unfl		//then branch
+set_unfl:
+	orw	#dnrm_tag,L_SCR1(%a6) //set denorm tag
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6) //set unfl exception bit
+no_unfl:
+	rts
+//
+//     CASE opclass 0x0 unsupp
+//
+uns_opx:
+	bsr	nrm_zero	//normalize the number
+	btstb	#7,LOCAL_HI(%a0)	//check if integer bit (j-bit) is set 
+	beqs	uns_den		//if clear then now have a denorm
+uns_nrm:
+	orb	#norm_tag,L_SCR1(%a6) //set tag to norm
+	rts
+uns_den:
+	orb	#dnrm_tag,L_SCR1(%a6) //set tag to denorm
+	rts
+//
+//     CASE opclass 0x0 unimp
+//
+uni_inst:
+	bsr	nrm_zero
+	btstb	#7,LOCAL_HI(%a0)	//check if integer bit (j-bit) is set 
+	beqs	uni_den		//if clear then now have a denorm
+uni_nrm:
+	orb	#norm_tag,L_SCR1(%a6) //set tag to norm
+	rts
+uni_den:
+	orb	#dnrm_tag,L_SCR1(%a6) //set tag to denorm
+	rts
+
+//
+//	Decimal to binary conversion
+//
+// Special cases of inf and NaNs are completed outside of decbin.  
+// If the input is an snan, the snan bit is not set.
+// 
+// input:
+//	ETEMP(a6)	- points to packed decimal string in memory
+// output:
+//	fp0	- contains packed string converted to extended precision
+//	ETEMP	- same as fp0
+unpack:
+	movew	CMDREG1B(%a6),%d0	//examine command word, looking for fmove's
+	andw	#0x3b,%d0
+	beq	move_unpack	//special handling for fmove: must set FPSR_CC
+
+	movew	ETEMP(%a6),%d0	//get word with inf information
+	bfextu	%d0{#20:#12},%d1	//get exponent into d1
+	cmpiw	#0x0fff,%d1	//test for inf or NaN
+	bnes	try_zero	//if not equal, it is not special
+	bfextu	%d0{#17:#3},%d1	//get SE and y bits into d1
+	cmpiw	#7,%d1		//SE and y bits must be on for special
+	bnes	try_zero	//if not on, it is not special
+//input is of the special cases of inf and NaN
+	tstl	ETEMP_HI(%a6)	//check ms mantissa
+	bnes	fix_nan		//if non-zero, it is a NaN
+	tstl	ETEMP_LO(%a6)	//check ls mantissa
+	bnes	fix_nan		//if non-zero, it is a NaN
+	bra	finish		//special already on stack
+fix_nan:
+	btstb	#signan_bit,ETEMP_HI(%a6) //test for snan
+	bne	finish
+	orl	#snaniop_mask,USER_FPSR(%a6) //always set snan if it is so
+	bra	finish
+try_zero:
+	movew	ETEMP_EX+2(%a6),%d0 //get word 4
+	andiw	#0x000f,%d0	//clear all but last ni(y)bble
+	tstw	%d0		//check for zero.
+	bne	not_spec
+	tstl	ETEMP_HI(%a6)	//check words 3 and 2
+	bne	not_spec
+	tstl	ETEMP_LO(%a6)	//check words 1 and 0
+	bne	not_spec
+	tstl	ETEMP(%a6)	//test sign of the zero
+	bges	pos_zero
+	movel	#0x80000000,ETEMP(%a6) //write neg zero to etemp
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bra	finish
+pos_zero:
+	clrl	ETEMP(%a6)
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bra	finish
+
+not_spec:
+	fmovemx %fp0-%fp1,-(%a7)	//save fp0 - decbin returns in it
+	bsr	decbin
+	fmovex %fp0,ETEMP(%a6)	//put the unpacked sop in the fsave stack
+	fmovemx (%a7)+,%fp0-%fp1
+	fmovel	#0,%FPSR		//clr fpsr from decbin
+	bra	finish
+
+//
+// Special handling for packed move in:  Same results as all other
+// packed cases, but we must set the FPSR condition codes properly.
+//
+move_unpack:
+	movew	ETEMP(%a6),%d0	//get word with inf information
+	bfextu	%d0{#20:#12},%d1	//get exponent into d1
+	cmpiw	#0x0fff,%d1	//test for inf or NaN
+	bnes	mtry_zero	//if not equal, it is not special
+	bfextu	%d0{#17:#3},%d1	//get SE and y bits into d1
+	cmpiw	#7,%d1		//SE and y bits must be on for special
+	bnes	mtry_zero	//if not on, it is not special
+//input is of the special cases of inf and NaN
+	tstl	ETEMP_HI(%a6)	//check ms mantissa
+	bnes	mfix_nan		//if non-zero, it is a NaN
+	tstl	ETEMP_LO(%a6)	//check ls mantissa
+	bnes	mfix_nan		//if non-zero, it is a NaN
+//input is inf
+	orl	#inf_mask,USER_FPSR(%a6) //set I bit
+	tstl	ETEMP(%a6)	//check sign
+	bge	finish
+	orl	#neg_mask,USER_FPSR(%a6) //set N bit
+	bra	finish		//special already on stack
+mfix_nan:
+	orl	#nan_mask,USER_FPSR(%a6) //set NaN bit
+	moveb	#nan_tag,STAG(%a6)	//set stag to NaN
+	btstb	#signan_bit,ETEMP_HI(%a6) //test for snan
+	bnes	mn_snan
+	orl	#snaniop_mask,USER_FPSR(%a6) //set snan bit
+	btstb	#snan_bit,FPCR_ENABLE(%a6) //test for snan enabled
+	bnes	mn_snan
+	bsetb	#signan_bit,ETEMP_HI(%a6) //force snans to qnans
+mn_snan:
+	tstl	ETEMP(%a6)	//check for sign
+	bge	finish		//if clr, go on
+	orl	#neg_mask,USER_FPSR(%a6) //set N bit
+	bra	finish
+
+mtry_zero:
+	movew	ETEMP_EX+2(%a6),%d0 //get word 4
+	andiw	#0x000f,%d0	//clear all but last ni(y)bble
+	tstw	%d0		//check for zero.
+	bnes	mnot_spec
+	tstl	ETEMP_HI(%a6)	//check words 3 and 2
+	bnes	mnot_spec
+	tstl	ETEMP_LO(%a6)	//check words 1 and 0
+	bnes	mnot_spec
+	tstl	ETEMP(%a6)	//test sign of the zero
+	bges	mpos_zero
+	orl	#neg_mask+z_mask,USER_FPSR(%a6) //set N and Z
+	movel	#0x80000000,ETEMP(%a6) //write neg zero to etemp
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bras	finish
+mpos_zero:
+	orl	#z_mask,USER_FPSR(%a6) //set Z
+	clrl	ETEMP(%a6)
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bras	finish
+
+mnot_spec:
+	fmovemx %fp0-%fp1,-(%a7)	//save fp0 ,fp1 - decbin returns in fp0
+	bsr	decbin
+	fmovex %fp0,ETEMP(%a6)
+//				;put the unpacked sop in the fsave stack
+	fmovemx (%a7)+,%fp0-%fp1
+
+finish:
+	movew	CMDREG1B(%a6),%d0	//get the command word
+	andw	#0xfbff,%d0	//change the source specifier field to 
+//				;extended (was packed).
+	movew	%d0,CMDREG1B(%a6)	//write command word back to fsave stack
+//				;we need to do this so the 040 will 
+//				;re-execute the inst. without taking 
+//				;another packed trap.
+
+fix_stag:
+//Converted result is now in etemp on fsave stack, now set the source 
+//tag (stag) 
+//	if (ete =$7fff) then INF or NAN
+//		if (etemp = $x.0----0) then
+//			stag = INF
+//		else
+//			stag = NAN
+//	else
+//		if (ete = $0000) then
+//			stag = ZERO
+//		else
+//			stag = NORM
+//
+// Note also that the etemp_15 bit (just right of the stag) must
+// be set accordingly.  
+//
+	movew		ETEMP_EX(%a6),%d1
+	andiw		#0x7fff,%d1   //strip sign
+	cmpw  		#0x7fff,%d1
+	bnes  		z_or_nrm
+	movel		ETEMP_HI(%a6),%d1
+	bnes		is_nan
+	movel		ETEMP_LO(%a6),%d1
+	bnes		is_nan
+is_inf:
+	moveb		#0x40,STAG(%a6)
+	movel		#0x40,%d0
+	rts
+is_nan:
+	moveb		#0x60,STAG(%a6)
+	movel		#0x60,%d0
+	rts
+z_or_nrm:
+	tstw		%d1  
+	bnes		is_nrm
+is_zro:
+// For a zero, set etemp_15
+	moveb		#0x30,STAG(%a6)
+	movel		#0x20,%d0
+	rts
+is_nrm:
+// For a norm, check if the exp <= $3fff; if so, set etemp_15
+	cmpiw		#0x3fff,%d1
+	bles		set_bit15
+	moveb		#0,STAG(%a6)
+	bras		end_is_nrm
+set_bit15:
+	moveb		#0x10,STAG(%a6)
+end_is_nrm:
+	movel		#0,%d0
+end_fix:
+	rts
+ 
+end_get:
+	rts
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/kernel_ex.s b/c/src/lib/libcpu/m68k/m68040/fpsp/kernel_ex.s
new file mode 100644
index 0000000000..5873f42f13
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/kernel_ex.s
@@ -0,0 +1,494 @@
+//
+//	kernel_ex.sa 3.3 12/19/90 
+//
+// This file contains routines to force exception status in the 
+// fpu for exceptional cases detected or reported within the
+// transcendental functions.  Typically, the t_xx routine will
+// set the appropriate bits in the USER_FPSR word on the stack.
+// The bits are tested in gen_except.sa to determine if an exceptional
+// situation needs to be created on return from the FPSP. 
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+KERNEL_EX:    //idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section    8
+
+	.include "fpsp.defs"
+
+mns_inf:  .long 0xffff0000,0x00000000,0x00000000
+pls_inf:  .long 0x7fff0000,0x00000000,0x00000000
+nan:      .long 0x7fff0000,0xffffffff,0xffffffff
+huge:     .long 0x7ffe0000,0xffffffff,0xffffffff
+
+	|xref	  ovf_r_k
+	|xref	  unf_sub
+	|xref	  nrm_set
+
+	.global   	  t_dz
+	.global      t_dz2
+	.global      t_operr
+	.global      t_unfl
+	.global      t_ovfl
+	.global      t_ovfl2
+	.global      t_inx2
+	.global	  t_frcinx
+	.global	  t_extdnrm
+	.global	  t_resdnrm
+	.global	  dst_nan
+	.global	  src_nan
+//
+//	DZ exception
+//
+//
+//	if dz trap disabled
+//		store properly signed inf (use sign of etemp) into fp0
+//		set FPSR exception status dz bit, condition code 
+//		inf bit, and accrued dz bit
+//		return
+//		frestore the frame into the machine (done by unimp_hd)
+//
+//	else dz trap enabled
+//		set exception status bit & accrued bits in FPSR
+//		set flag to disable sto_res from corrupting fp register
+//		return
+//		frestore the frame into the machine (done by unimp_hd)
+//
+// t_dz2 is used by monadic functions such as flogn (from do_func).
+// t_dz is used by monadic functions such as satanh (from the 
+// transcendental function).
+//
+t_dz2:
+	bsetb	#neg_bit,FPSR_CC(%a6)	//set neg bit in FPSR
+	fmovel	#0,%FPSR			//clr status bits (Z set)
+	btstb	#dz_bit,FPCR_ENABLE(%a6)	//test FPCR for dz exc enabled
+	bnes	dz_ena_end
+	bras	m_inf			//flogx always returns -inf
+t_dz:
+	fmovel	#0,%FPSR			//clr status bits (Z set)
+	btstb	#dz_bit,FPCR_ENABLE(%a6)	//test FPCR for dz exc enabled
+	bnes	dz_ena
+//
+//	dz disabled
+//
+	btstb	#sign_bit,ETEMP_EX(%a6)	//check sign for neg or pos
+	beqs	p_inf			//branch if pos sign
+
+m_inf:
+	fmovemx mns_inf,%fp0-%fp0		//load -inf
+	bsetb	#neg_bit,FPSR_CC(%a6)	//set neg bit in FPSR
+	bras	set_fpsr
+p_inf:
+	fmovemx pls_inf,%fp0-%fp0		//load +inf
+set_fpsr:
+	orl	#dzinf_mask,USER_FPSR(%a6) //set I,DZ,ADZ
+	rts
+//
+//	dz enabled
+//
+dz_ena:
+	btstb	#sign_bit,ETEMP_EX(%a6)	//check sign for neg or pos
+	beqs	dz_ena_end
+	bsetb	#neg_bit,FPSR_CC(%a6)	//set neg bit in FPSR
+dz_ena_end:
+	orl	#dzinf_mask,USER_FPSR(%a6) //set I,DZ,ADZ
+	st	STORE_FLG(%a6)
+	rts
+//
+//	OPERR exception
+//
+//	if (operr trap disabled)
+//		set FPSR exception status operr bit, condition code 
+//		nan bit; Store default NAN into fp0
+//		frestore the frame into the machine (done by unimp_hd)
+//	
+//	else (operr trap enabled)
+//		set FPSR exception status operr bit, accrued operr bit
+//		set flag to disable sto_res from corrupting fp register
+//		frestore the frame into the machine (done by unimp_hd)
+//
+t_operr:
+	orl	#opnan_mask,USER_FPSR(%a6) //set NaN, OPERR, AIOP
+
+	btstb	#operr_bit,FPCR_ENABLE(%a6) //test FPCR for operr enabled
+	bnes	op_ena
+
+	fmovemx nan,%fp0-%fp0		//load default nan
+	rts
+op_ena:
+	st	STORE_FLG(%a6)		//do not corrupt destination
+	rts
+
+//
+//	t_unfl --- UNFL exception
+//
+// This entry point is used by all routines requiring unfl, inex2,
+// aunfl, and ainex to be set on exit.
+//
+// On entry, a0 points to the exceptional operand.  The final exceptional
+// operand is built in FP_SCR1 and only the sign from the original operand
+// is used.
+//
+t_unfl:
+	clrl	FP_SCR1(%a6)		//set exceptional operand to zero
+	clrl	FP_SCR1+4(%a6)
+	clrl	FP_SCR1+8(%a6)
+	tstb	(%a0)			//extract sign from caller's exop
+	bpls	unfl_signok
+	bset	#sign_bit,FP_SCR1(%a6)
+unfl_signok:
+	leal	FP_SCR1(%a6),%a0
+	orl	#unfinx_mask,USER_FPSR(%a6)
+//					;set UNFL, INEX2, AUNFL, AINEX
+unfl_con:
+	btstb	#unfl_bit,FPCR_ENABLE(%a6)
+	beqs	unfl_dis
+
+unfl_ena:
+	bfclr	STAG(%a6){#5:#3}		//clear wbtm66,wbtm1,wbtm0
+	bsetb	#wbtemp15_bit,WB_BYTE(%a6) //set wbtemp15
+	bsetb	#sticky_bit,STICKY(%a6)	//set sticky bit
+
+	bclrb	#E1,E_BYTE(%a6)
+
+unfl_dis:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//get round precision
+	
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		//convert to internal ext format
+
+	bsr	unf_sub			//returns IEEE result at a0
+//					;and sets FPSR_CC accordingly
+	
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
+	beqs	unfl_fin
+
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+	bsetb	#sign_bit,FP_SCR1(%a6)	//set sign bit of exc operand
+
+unfl_fin:
+	fmovemx (%a0),%fp0-%fp0		//store result in fp0
+	rts
+	
+
+//
+//	t_ovfl2 --- OVFL exception (without inex2 returned)
+//
+// This entry is used by scale to force catastrophic overflow.  The
+// ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
+//
+t_ovfl2:
+	orl	#ovfl_inx_mask,USER_FPSR(%a6)
+	movel	ETEMP(%a6),FP_SCR1(%a6)
+	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
+	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
+//
+// Check for single or double round precision.  If single, check if
+// the lower 40 bits of ETEMP are zero; if not, set inex2.  If double,
+// check if the lower 21 bits are zero; if not, set inex2.
+//
+	moveb	FPCR_MODE(%a6),%d0
+	andib	#0xc0,%d0
+	beq	t_work		//if extended, finish ovfl processing
+	cmpib	#0x40,%d0		//test for single
+	bnes	t_dbl
+t_sgl:
+	tstb	ETEMP_LO(%a6)
+	bnes	t_setinx2
+	movel	ETEMP_HI(%a6),%d0
+	andil	#0xff,%d0		//look at only lower 8 bits
+	bnes	t_setinx2
+	bra	t_work
+t_dbl:
+	movel	ETEMP_LO(%a6),%d0
+	andil	#0x7ff,%d0	//look at only lower 11 bits
+	beq	t_work
+t_setinx2:
+	orl	#inex2_mask,USER_FPSR(%a6)
+	bras	t_work
+//
+//	t_ovfl --- OVFL exception
+//
+//** Note: the exc operand is returned in ETEMP.
+//
+t_ovfl:
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+t_work:
+	btstb	#ovfl_bit,FPCR_ENABLE(%a6) //test FPCR for ovfl enabled
+	beqs	ovf_dis
+
+ovf_ena:
+	clrl	FP_SCR1(%a6)		//set exceptional operand
+	clrl	FP_SCR1+4(%a6)
+	clrl	FP_SCR1+8(%a6)
+
+	bfclr	STAG(%a6){#5:#3}		//clear wbtm66,wbtm1,wbtm0
+	bclrb	#wbtemp15_bit,WB_BYTE(%a6) //clear wbtemp15
+	bsetb	#sticky_bit,STICKY(%a6)	//set sticky bit
+
+	bclrb	#E1,E_BYTE(%a6)
+//					;fall through to disabled case
+
+// For disabled overflow call 'ovf_r_k'.  This routine loads the
+// correct result based on the rounding precision, destination
+// format, rounding mode and sign.
+//
+ovf_dis:
+	bsr	ovf_r_k			//returns unsigned ETEMP_EX
+//					;and sets FPSR_CC accordingly.
+	bfclr	ETEMP_SGN(%a6){#0:#8}	//fix sign
+	beqs	ovf_pos
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+	bsetb	#sign_bit,FP_SCR1(%a6)	//set exceptional operand sign
+ovf_pos:
+	fmovemx ETEMP(%a6),%fp0-%fp0		//move the result to fp0
+	rts
+
+
+//
+//	INEX2 exception
+//
+// The inex2 and ainex bits are set.
+//
+t_inx2:
+	orl	#inx2a_mask,USER_FPSR(%a6) //set INEX2, AINEX
+	rts
+
+//
+//	Force Inex2
+//
+// This routine is called by the transcendental routines to force
+// the inex2 exception bits set in the FPSR.  If the underflow bit
+// is set, but the underflow trap was not taken, the aunfl bit in
+// the FPSR must be set.
+//
+t_frcinx:
+	orl	#inx2a_mask,USER_FPSR(%a6) //set INEX2, AINEX
+	btstb	#unfl_bit,FPSR_EXCEPT(%a6) //test for unfl bit set
+	beqs	no_uacc1		//if clear, do not set aunfl
+	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_uacc1:
+	rts
+
+//
+//	DST_NAN
+//
+// Determine if the destination nan is signalling or non-signalling,
+// and set the FPSR bits accordingly.  See the MC68040 User's Manual 
+// section 3.2.2.5 NOT-A-NUMBERS.
+//
+dst_nan:
+	btstb	#sign_bit,FPTEMP_EX(%a6) //test sign of nan
+	beqs	dst_pos			//if clr, it was positive
+	bsetb	#neg_bit,FPSR_CC(%a6)	//set N bit
+dst_pos:
+	btstb	#signan_bit,FPTEMP_HI(%a6) //check if signalling 
+	beqs	dst_snan		//branch if signalling
+
+	fmovel	%d1,%fpcr			//restore user's rmode/prec
+	fmovex FPTEMP(%a6),%fp0		//return the non-signalling nan
+//
+// Check the source nan.  If it is signalling, snan will be reported.
+//
+	moveb	STAG(%a6),%d0
+	andib	#0xe0,%d0
+	cmpib	#0x60,%d0
+	bnes	no_snan
+	btstb	#signan_bit,ETEMP_HI(%a6) //check if signalling 
+	bnes	no_snan
+	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
+no_snan:
+	rts	
+
+dst_snan:
+	btstb	#snan_bit,FPCR_ENABLE(%a6) //check if trap enabled 
+	beqs	dst_dis			//branch if disabled
+
+	orb	#nan_tag,DTAG(%a6)	//set up dtag for nan
+	st	STORE_FLG(%a6)		//do not store a result
+	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
+	rts
+
+dst_dis:
+	bsetb	#signan_bit,FPTEMP_HI(%a6) //set SNAN bit in sop 
+	fmovel	%d1,%fpcr			//restore user's rmode/prec
+	fmovex FPTEMP(%a6),%fp0		//load non-sign. nan 
+	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
+	rts
+
+//
+//	SRC_NAN
+//
+// Determine if the source nan is signalling or non-signalling,
+// and set the FPSR bits accordingly.  See the MC68040 User's Manual 
+// section 3.2.2.5 NOT-A-NUMBERS.
+//
+src_nan:
+	btstb	#sign_bit,ETEMP_EX(%a6) //test sign of nan
+	beqs	src_pos			//if clr, it was positive
+	bsetb	#neg_bit,FPSR_CC(%a6)	//set N bit
+src_pos:
+	btstb	#signan_bit,ETEMP_HI(%a6) //check if signalling 
+	beqs	src_snan		//branch if signalling
+	fmovel	%d1,%fpcr			//restore user's rmode/prec
+	fmovex ETEMP(%a6),%fp0		//return the non-signalling nan
+	rts	
+
+src_snan:
+	btstb	#snan_bit,FPCR_ENABLE(%a6) //check if trap enabled 
+	beqs	src_dis			//branch if disabled
+	bsetb	#signan_bit,ETEMP_HI(%a6) //set SNAN bit in sop 
+	orb	#norm_tag,DTAG(%a6)	//set up dtag for norm
+	orb	#nan_tag,STAG(%a6)	//set up stag for nan
+	st	STORE_FLG(%a6)		//do not store a result
+	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
+	rts
+
+src_dis:
+	bsetb	#signan_bit,ETEMP_HI(%a6) //set SNAN bit in sop 
+	fmovel	%d1,%fpcr			//restore user's rmode/prec
+	fmovex ETEMP(%a6),%fp0		//load non-sign. nan 
+	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
+	rts
+
+//
+// For all functions that have a denormalized input and that f(x)=x,
+// this is the entry point
+//
+t_extdnrm:
+	orl	#unfinx_mask,USER_FPSR(%a6)
+//					;set UNFL, INEX2, AUNFL, AINEX
+	bras	xdnrm_con
+//
+// Entry point for scale with extended denorm.  The function does
+// not set inex2, aunfl, or ainex.  
+//
+t_resdnrm:
+	orl	#unfl_mask,USER_FPSR(%a6)
+
+xdnrm_con:
+	btstb	#unfl_bit,FPCR_ENABLE(%a6)
+	beqs	xdnrm_dis
+
+//
+// If exceptions are enabled, the additional task of setting up WBTEMP
+// is needed so that when the underflow exception handler is entered,
+// the user perceives no difference between what the 040 provides vs.
+// what the FPSP provides.
+//
+xdnrm_ena:
+	movel	%a0,-(%a7)
+
+	movel	LOCAL_EX(%a0),FP_SCR1(%a6)
+	movel	LOCAL_HI(%a0),FP_SCR1+4(%a6)
+	movel	LOCAL_LO(%a0),FP_SCR1+8(%a6)
+
+	lea	FP_SCR1(%a6),%a0
+
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		//convert to internal ext format
+	tstw	LOCAL_EX(%a0)		//check if input is denorm
+	beqs	xdnrm_dn		//if so, skip nrm_set
+	bsr	nrm_set			//normalize the result (exponent
+//					;will be negative
+xdnrm_dn:
+	bclrb	#sign_bit,LOCAL_EX(%a0)	//take off false sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
+	beqs	xdep
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+xdep:	
+	bfclr	STAG(%a6){#5:#3}		//clear wbtm66,wbtm1,wbtm0
+	bsetb	#wbtemp15_bit,WB_BYTE(%a6) //set wbtemp15
+	bclrb	#sticky_bit,STICKY(%a6)	//clear sticky bit
+	bclrb	#E1,E_BYTE(%a6)
+	movel	(%a7)+,%a0
+xdnrm_dis:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//get round precision
+	bnes	not_ext			//if not round extended, store
+//					;IEEE defaults
+is_ext:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beqs	xdnrm_store
+
+	bsetb	#neg_bit,FPSR_CC(%a6)	//set N bit in FPSR_CC
+
+	bras	xdnrm_store
+
+not_ext:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		//convert to internal ext format
+	bsr	unf_sub			//returns IEEE result pointed by
+//					;a0; sets FPSR_CC accordingly
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
+	beqs	xdnrm_store
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+xdnrm_store:
+	fmovemx (%a0),%fp0-%fp0		//store result in fp0
+	rts
+
+//
+// This subroutine is used for dyadic operations that use an extended
+// denorm within the kernel. The approach used is to capture the frame,
+// fix/restore.
+//
+	.global	t_avoid_unsupp
+t_avoid_unsupp:
+	link	%a2,#-LOCAL_SIZE		//so that a2 fpsp.h negative 
+//					;offsets may be used
+	fsave	-(%a7)
+	tstb	1(%a7)			//check if idle, exit if so
+	beq	idle_end
+	btstb	#E1,E_BYTE(%a2)		//check for an E1 exception if
+//					;enabled, there is an unsupp
+	beq	end_avun		//else, exit
+	btstb	#7,DTAG(%a2)		//check for denorm destination
+	beqs	src_den			//else, must be a source denorm
+//
+// handle destination denorm
+//
+	lea	FPTEMP(%a2),%a0
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		//convert to internal ext format
+	bclrb	#7,DTAG(%a2)		//set DTAG to norm
+	bsr	nrm_set			//normalize result, exponent
+//					;will become negative
+	bclrb	#sign_bit,LOCAL_EX(%a0)	//get rid of fake sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
+	beqs	ck_src_den		//check if source is also denorm
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+ck_src_den:
+	btstb	#7,STAG(%a2)
+	beqs	end_avun
+src_den:
+	lea	ETEMP(%a2),%a0
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		//convert to internal ext format
+	bclrb	#7,STAG(%a2)		//set STAG to norm
+	bsr	nrm_set			//normalize result, exponent
+//					;will become negative
+	bclrb	#sign_bit,LOCAL_EX(%a0)	//get rid of fake sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
+	beqs	den_com
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+den_com:
+	moveb	#0xfe,CU_SAVEPC(%a2)	//set continue frame
+	clrw	NMNEXC(%a2)		//clear NMNEXC
+	bclrb	#E1,E_BYTE(%a2)
+//	fmove.l	%FPSR,FPSR_SHADOW(%a2)
+//	bset.b	#SFLAG,E_BYTE(%a2)
+//	bset.b	#XFLAG,T_BYTE(%a2)
+end_avun:
+	frestore (%a7)+
+	unlk	%a2
+	rts
+idle_end:
+	addl	#4,%a7
+	unlk	%a2
+	rts
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/res_func.s b/c/src/lib/libcpu/m68k/m68040/fpsp/res_func.s
new file mode 100644
index 0000000000..df8d0d9313
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/res_func.s
@@ -0,0 +1,2040 @@
+//
+//	res_func.sa 3.9 7/29/91
+//
+// Normalizes denormalized numbers if necessary and updates the
+// stack frame.  The function is then restored back into the
+// machine and the 040 completes the operation.  This routine
+// is only used by the unsupported data type/format handler.
+// (Exception vector 55).
+//
+// For packed move out (fmove.p fpm,<ea>) the operation is
+// completed here; data is packed and moved to user memory. 
+// The stack is restored to the 040 only in the case of a
+// reportable exception in the conversion.
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+RES_FUNC:    //idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+sp_bnds:	.short	0x3f81,0x407e
+		.short	0x3f6a,0x0000
+dp_bnds:	.short	0x3c01,0x43fe
+		.short	0x3bcd,0x0000
+
+	|xref	mem_write
+	|xref	bindec
+	|xref	get_fline
+	|xref	round
+	|xref	denorm
+	|xref	dest_ext
+	|xref	dest_dbl
+	|xref	dest_sgl
+	|xref	unf_sub
+	|xref	nrm_set
+	|xref	dnrm_lp
+	|xref	ovf_res
+	|xref	reg_dest
+	|xref	t_ovfl
+	|xref	t_unfl
+
+	.global	res_func
+	.global 	p_move
+
+res_func:
+	clrb	DNRM_FLG(%a6)
+	clrb	RES_FLG(%a6)
+	clrb	CU_ONLY(%a6)
+	tstb	DY_MO_FLG(%a6)
+	beqs	monadic
+dyadic:
+	btstb	#7,DTAG(%a6)	//if dop = norm=000, zero=001,
+//				;inf=010 or nan=011
+	beqs	monadic		//then branch
+//				;else denorm
+// HANDLE DESTINATION DENORM HERE
+//				;set dtag to norm
+//				;write the tag & fpte15 to the fstack
+	leal	FPTEMP(%a6),%a0
+
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+
+	bsr	nrm_set		//normalize number (exp will go negative)
+	bclrb	#sign_bit,LOCAL_EX(%a0) //get rid of false sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
+	beqs	dpos
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+dpos:
+	bfclr	DTAG(%a6){#0:#4}	//set tag to normalized, FPTE15 = 0
+	bsetb	#4,DTAG(%a6)	//set FPTE15
+	orb	#0x0f,DNRM_FLG(%a6)
+monadic:
+	leal	ETEMP(%a6),%a0
+	btstb	#direction_bit,CMDREG1B(%a6)	//check direction
+	bne	opclass3			//it is a mv out
+//
+// At this point, only opclass 0 and 2 possible
+//
+	btstb	#7,STAG(%a6)	//if sop = norm=000, zero=001,
+//				;inf=010 or nan=011
+	bne	mon_dnrm	//else denorm
+	tstb	DY_MO_FLG(%a6)	//all cases of dyadic instructions would
+	bne	normal		//require normalization of denorm
+
+// At this point:
+//	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
+//				fmove = $00  fsmove = $40  fdmove = $44
+//				fsqrt = $05* fssqrt = $41  fdsqrt = $45
+//				(*fsqrt reencoded to $05)
+//
+	movew	CMDREG1B(%a6),%d0	//get command register
+	andil	#0x7f,%d0			//strip to only command word
+//
+// At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 
+// fdsqrt are possible.
+// For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+// For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+//
+	btstl	#0,%d0
+	bne	normal			//weed out fsqrt instructions
+//
+// cu_norm handles fmove in instructions with normalized inputs.
+// The routine round is used to correctly round the input for the
+// destination precision and mode.
+//
+cu_norm:
+	st	CU_ONLY(%a6)		//set cu-only inst flag
+	movew	CMDREG1B(%a6),%d0
+	andib	#0x3b,%d0		//isolate bits to select inst
+	tstb	%d0
+	beql	cu_nmove	//if zero, it is an fmove
+	cmpib	#0x18,%d0
+	beql	cu_nabs		//if $18, it is fabs
+	cmpib	#0x1a,%d0
+	beql	cu_nneg		//if $1a, it is fneg
+//
+// Inst is ftst.  Check the source operand and set the cc's accordingly.
+// No write is done, so simply rts.
+//
+cu_ntst:
+	movew	LOCAL_EX(%a0),%d0
+	bclrl	#15,%d0
+	sne	LOCAL_SGN(%a0)
+	beqs	cu_ntpo
+	orl	#neg_mask,USER_FPSR(%a6) //set N
+cu_ntpo:
+	cmpiw	#0x7fff,%d0	//test for inf/nan
+	bnes	cu_ntcz
+	tstl	LOCAL_HI(%a0)
+	bnes	cu_ntn
+	tstl	LOCAL_LO(%a0)
+	bnes	cu_ntn
+	orl	#inf_mask,USER_FPSR(%a6)
+	rts
+cu_ntn:
+	orl	#nan_mask,USER_FPSR(%a6)
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	//set up fptemp sign for 
+//						;snan handler
+
+	rts
+cu_ntcz:
+	tstl	LOCAL_HI(%a0)
+	bnel	cu_ntsx
+	tstl	LOCAL_LO(%a0)
+	bnel	cu_ntsx
+	orl	#z_mask,USER_FPSR(%a6)
+cu_ntsx:
+	rts
+//
+// Inst is fabs.  Execute the absolute value function on the input.
+// Branch to the fmove code.  If the operand is NaN, do nothing.
+//
+cu_nabs:
+	moveb	STAG(%a6),%d0
+	btstl	#5,%d0			//test for NaN or zero
+	bne	wr_etemp		//if either, simply write it
+	bclrb	#7,LOCAL_EX(%a0)		//do abs
+	bras	cu_nmove		//fmove code will finish
+//
+// Inst is fneg.  Execute the negate value function on the input.
+// Fall though to the fmove code.  If the operand is NaN, do nothing.
+//
+cu_nneg:
+	moveb	STAG(%a6),%d0
+	btstl	#5,%d0			//test for NaN or zero
+	bne	wr_etemp		//if either, simply write it
+	bchgb	#7,LOCAL_EX(%a0)		//do neg
+//
+// Inst is fmove.  This code also handles all result writes.
+// If bit 2 is set, round is forced to double.  If it is clear,
+// and bit 6 is set, round is forced to single.  If both are clear,
+// the round precision is found in the fpcr.  If the rounding precision
+// is double or single, round the result before the write.
+//
+cu_nmove:
+	moveb	STAG(%a6),%d0
+	andib	#0xe0,%d0			//isolate stag bits
+	bne	wr_etemp		//if not norm, simply write it
+	btstb	#2,CMDREG1B+1(%a6)	//check for rd
+	bne	cu_nmrd
+	btstb	#6,CMDREG1B+1(%a6)	//check for rs
+	bne	cu_nmrs
+//
+// The move or operation is not with forced precision.  Test for
+// nan or inf as the input; if so, simply write it to FPn.  Use the
+// FPCR_MODE byte to get rounding on norms and zeros.
+//
+cu_nmnr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0
+	tstb	%d0			//check for extended
+	beq	cu_wrexn		//if so, just write result
+	cmpib	#1,%d0			//check for single
+	beq	cu_nmrs			//fall through to double
+//
+// The move is fdmove or round precision is double.
+//
+cu_nmrd:
+	movel	#2,%d0			//set up the size for denorm
+	movew	LOCAL_EX(%a0),%d1		//compare exponent to double threshold
+	andw	#0x7fff,%d1	
+	cmpw	#0x3c01,%d1
+	bls	cu_nunfl
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//get rmode
+	orl	#0x00020000,%d1		//or in rprec (double)
+	clrl	%d0			//clear g,r,s for round
+	bclrb	#sign_bit,LOCAL_EX(%a0)	//convert to internal format
+	sne	LOCAL_SGN(%a0)
+	bsrl	round
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	cu_nmrdc
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nmrdc:
+	movew	LOCAL_EX(%a0),%d1		//check for overflow
+	andw	#0x7fff,%d1
+	cmpw	#0x43ff,%d1
+	bge	cu_novfl		//take care of overflow case
+	bra	cu_wrexn
+//
+// The move is fsmove or round precision is single.
+//
+cu_nmrs:
+	movel	#1,%d0
+	movew	LOCAL_EX(%a0),%d1
+	andw	#0x7fff,%d1
+	cmpw	#0x3f81,%d1
+	bls	cu_nunfl
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1
+	orl	#0x00010000,%d1
+	clrl	%d0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsrl	round
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	cu_nmrsc
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nmrsc:
+	movew	LOCAL_EX(%a0),%d1
+	andw	#0x7FFF,%d1
+	cmpw	#0x407f,%d1
+	blt	cu_wrexn
+//
+// The operand is above precision boundaries.  Use t_ovfl to
+// generate the correct value.
+//
+cu_novfl:
+	bsr	t_ovfl
+	bra	cu_wrexn
+//
+// The operand is below precision boundaries.  Use denorm to
+// generate the correct value.
+//
+cu_nunfl:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsr	denorm
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
+	beqs	cu_nucont
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nucont:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1
+	btstb	#2,CMDREG1B+1(%a6)	//check for rd
+	bne	inst_d
+	btstb	#6,CMDREG1B+1(%a6)	//check for rs
+	bne	inst_s
+	swap	%d1
+	moveb	FPCR_MODE(%a6),%d1
+	lsrb	#6,%d1
+	swap	%d1
+	bra	inst_sd
+inst_d:
+	orl	#0x00020000,%d1
+	bra	inst_sd
+inst_s:
+	orl	#0x00010000,%d1
+inst_sd:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsrl	round
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	cu_nuflp
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nuflp:
+	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs	cu_nuninx
+	orl	#aunfl_mask,USER_FPSR(%a6) //if the round was inex, set AUNFL
+cu_nuninx:
+	tstl	LOCAL_HI(%a0)		//test for zero
+	bnes	cu_nunzro
+	tstl	LOCAL_LO(%a0)
+	bnes	cu_nunzro
+//
+// The mantissa is zero from the denorm loop.  Check sign and rmode
+// to see if rounding should have occurred which would leave the lsb.
+//
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0		//isolate rmode
+	cmpil	#0x20,%d0
+	blts	cu_nzro
+	bnes	cu_nrp
+cu_nrm:
+	tstw	LOCAL_EX(%a0)	//if positive, set lsb
+	bges	cu_nzro
+	btstb	#7,FPCR_MODE(%a6) //check for double
+	beqs	cu_nincs
+	bras	cu_nincd
+cu_nrp:
+	tstw	LOCAL_EX(%a0)	//if positive, set lsb
+	blts	cu_nzro
+	btstb	#7,FPCR_MODE(%a6) //check for double
+	beqs	cu_nincs
+cu_nincd:
+	orl	#0x800,LOCAL_LO(%a0) //inc for double
+	bra	cu_nunzro
+cu_nincs:
+	orl	#0x100,LOCAL_HI(%a0) //inc for single
+	bra	cu_nunzro
+cu_nzro:
+	orl	#z_mask,USER_FPSR(%a6)
+	moveb	STAG(%a6),%d0
+	andib	#0xe0,%d0
+	cmpib	#0x40,%d0		//check if input was tagged zero
+	beqs	cu_numv
+cu_nunzro:
+	orl	#unfl_mask,USER_FPSR(%a6) //set unfl
+cu_numv:
+	movel	(%a0),ETEMP(%a6)
+	movel	4(%a0),ETEMP_HI(%a6)
+	movel	8(%a0),ETEMP_LO(%a6)
+//
+// Write the result to memory, setting the fpsr cc bits.  NaN and Inf
+// bypass cu_wrexn.
+//
+cu_wrexn:
+	tstw	LOCAL_EX(%a0)		//test for zero
+	beqs	cu_wrzero
+	cmpw	#0x8000,LOCAL_EX(%a0)	//test for zero
+	bnes	cu_wreon
+cu_wrzero:
+	orl	#z_mask,USER_FPSR(%a6)	//set Z bit
+cu_wreon:
+	tstw	LOCAL_EX(%a0)
+	bpl	wr_etemp
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+
+//
+// HANDLE SOURCE DENORM HERE
+//
+//				;clear denorm stag to norm
+//				;write the new tag & ete15 to the fstack
+mon_dnrm:
+//
+// At this point, check for the cases in which normalizing the 
+// denorm produces incorrect results.
+//
+	tstb	DY_MO_FLG(%a6)	//all cases of dyadic instructions would
+	bnes	nrm_src		//require normalization of denorm
+
+// At this point:
+//	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
+//				fmove = $00  fsmove = $40  fdmove = $44
+//				fsqrt = $05* fssqrt = $41  fdsqrt = $45
+//				(*fsqrt reencoded to $05)
+//
+	movew	CMDREG1B(%a6),%d0	//get command register
+	andil	#0x7f,%d0			//strip to only command word
+//
+// At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 
+// fdsqrt are possible.
+// For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+// For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+//
+	btstl	#0,%d0
+	bnes	nrm_src		//weed out fsqrt instructions
+	st	CU_ONLY(%a6)	//set cu-only inst flag
+	bra	cu_dnrm		//fmove, fabs, fneg, ftst 
+//				;cases go to cu_dnrm
+nrm_src:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsr	nrm_set		//normalize number (exponent will go 
+//				; negative)
+	bclrb	#sign_bit,LOCAL_EX(%a0) //get rid of false sign
+
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
+	beqs	spos
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+spos:
+	bfclr	STAG(%a6){#0:#4}	//set tag to normalized, FPTE15 = 0
+	bsetb	#4,STAG(%a6)	//set ETE15
+	orb	#0xf0,DNRM_FLG(%a6)
+normal:
+	tstb	DNRM_FLG(%a6)	//check if any of the ops were denorms
+	bne	ck_wrap		//if so, check if it is a potential
+//				;wrap-around case
+fix_stk:
+	moveb	#0xfe,CU_SAVEPC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+
+	clrw	NMNEXC(%a6)
+
+	st	RES_FLG(%a6)	//indicate that a restore is needed
+	rts
+
+//
+// cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
+// ftst) completely in software without an frestore to the 040. 
+//
+cu_dnrm:
+	st	CU_ONLY(%a6)
+	movew	CMDREG1B(%a6),%d0
+	andib	#0x3b,%d0		//isolate bits to select inst
+	tstb	%d0
+	beql	cu_dmove	//if zero, it is an fmove
+	cmpib	#0x18,%d0
+	beql	cu_dabs		//if $18, it is fabs
+	cmpib	#0x1a,%d0
+	beql	cu_dneg		//if $1a, it is fneg
+//
+// Inst is ftst.  Check the source operand and set the cc's accordingly.
+// No write is done, so simply rts.
+//
+cu_dtst:
+	movew	LOCAL_EX(%a0),%d0
+	bclrl	#15,%d0
+	sne	LOCAL_SGN(%a0)
+	beqs	cu_dtpo
+	orl	#neg_mask,USER_FPSR(%a6) //set N
+cu_dtpo:
+	cmpiw	#0x7fff,%d0	//test for inf/nan
+	bnes	cu_dtcz
+	tstl	LOCAL_HI(%a0)
+	bnes	cu_dtn
+	tstl	LOCAL_LO(%a0)
+	bnes	cu_dtn
+	orl	#inf_mask,USER_FPSR(%a6)
+	rts
+cu_dtn:
+	orl	#nan_mask,USER_FPSR(%a6)
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	//set up fptemp sign for 
+//						;snan handler
+	rts
+cu_dtcz:
+	tstl	LOCAL_HI(%a0)
+	bnel	cu_dtsx
+	tstl	LOCAL_LO(%a0)
+	bnel	cu_dtsx
+	orl	#z_mask,USER_FPSR(%a6)
+cu_dtsx:
+	rts
+//
+// Inst is fabs.  Execute the absolute value function on the input.
+// Branch to the fmove code.
+//
+cu_dabs:
+	bclrb	#7,LOCAL_EX(%a0)		//do abs
+	bras	cu_dmove		//fmove code will finish
+//
+// Inst is fneg.  Execute the negate value function on the input.
+// Fall though to the fmove code.
+//
+cu_dneg:
+	bchgb	#7,LOCAL_EX(%a0)		//do neg
+//
+// Inst is fmove.  This code also handles all result writes.
+// If bit 2 is set, round is forced to double.  If it is clear,
+// and bit 6 is set, round is forced to single.  If both are clear,
+// the round precision is found in the fpcr.  If the rounding precision
+// is double or single, the result is zero, and the mode is checked
+// to determine if the lsb of the result should be set.
+//
+cu_dmove:
+	btstb	#2,CMDREG1B+1(%a6)	//check for rd
+	bne	cu_dmrd
+	btstb	#6,CMDREG1B+1(%a6)	//check for rs
+	bne	cu_dmrs
+//
+// The move or operation is not with forced precision.  Use the
+// FPCR_MODE byte to get rounding.
+//
+cu_dmnr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0
+	tstb	%d0			//check for extended
+	beq	cu_wrexd		//if so, just write result
+	cmpib	#1,%d0			//check for single
+	beq	cu_dmrs			//fall through to double
+//
+// The move is fdmove or round precision is double.  Result is zero.
+// Check rmode for rp or rm and set lsb accordingly.
+//
+cu_dmrd:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//get rmode
+	tstw	LOCAL_EX(%a0)		//check sign
+	blts	cu_dmdn
+	cmpib	#3,%d1			//check for rp
+	bne	cu_dpd			//load double pos zero
+	bra	cu_dpdr			//load double pos zero w/lsb
+cu_dmdn:
+	cmpib	#2,%d1			//check for rm
+	bne	cu_dnd			//load double neg zero
+	bra	cu_dndr			//load double neg zero w/lsb
+//
+// The move is fsmove or round precision is single.  Result is zero.
+// Check for rp or rm and set lsb accordingly.
+//
+cu_dmrs:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//get rmode
+	tstw	LOCAL_EX(%a0)		//check sign
+	blts	cu_dmsn
+	cmpib	#3,%d1			//check for rp
+	bne	cu_spd			//load single pos zero
+	bra	cu_spdr			//load single pos zero w/lsb
+cu_dmsn:
+	cmpib	#2,%d1			//check for rm
+	bne	cu_snd			//load single neg zero
+	bra	cu_sndr			//load single neg zero w/lsb
+//
+// The precision is extended, so the result in etemp is correct.
+// Simply set unfl (not inex2 or aunfl) and write the result to 
+// the correct fp register.
+cu_wrexd:
+	orl	#unfl_mask,USER_FPSR(%a6)
+	tstw	LOCAL_EX(%a0)
+	beq	wr_etemp
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+//
+// These routines write +/- zero in double format.  The routines
+// cu_dpdr and cu_dndr set the double lsb.
+//
+cu_dpd:
+	movel	#0x3c010000,LOCAL_EX(%a0)	//force pos double zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_dpdr:
+	movel	#0x3c010000,LOCAL_EX(%a0)	//force pos double zero
+	clrl	LOCAL_HI(%a0)
+	movel	#0x800,LOCAL_LO(%a0)	//with lsb set
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_dnd:
+	movel	#0xbc010000,LOCAL_EX(%a0)	//force pos double zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_dndr:
+	movel	#0xbc010000,LOCAL_EX(%a0)	//force pos double zero
+	clrl	LOCAL_HI(%a0)
+	movel	#0x800,LOCAL_LO(%a0)	//with lsb set
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+//
+// These routines write +/- zero in single format.  The routines
+// cu_dpdr and cu_dndr set the single lsb.
+//
+cu_spd:
+	movel	#0x3f810000,LOCAL_EX(%a0)	//force pos single zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_spdr:
+	movel	#0x3f810000,LOCAL_EX(%a0)	//force pos single zero
+	movel	#0x100,LOCAL_HI(%a0)	//with lsb set
+	clrl	LOCAL_LO(%a0)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_snd:
+	movel	#0xbf810000,LOCAL_EX(%a0)	//force pos single zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_sndr:
+	movel	#0xbf810000,LOCAL_EX(%a0)	//force pos single zero
+	movel	#0x100,LOCAL_HI(%a0)	//with lsb set
+	clrl	LOCAL_LO(%a0)
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+	
+//
+// This code checks for 16-bit overflow conditions on dyadic
+// operations which are not restorable into the floating-point
+// unit and must be completed in software.  Basically, this
+// condition exists with a very large norm and a denorm.  One
+// of the operands must be denormalized to enter this code.
+//
+// Flags used:
+//	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
+//	DNRM_FLG contains $00 for neither op denormalized
+//	                  $0f for the destination op denormalized
+//	                  $f0 for the source op denormalized
+//	                  $ff for both ops denormalized
+//
+// The wrap-around condition occurs for add, sub, div, and cmp
+// when 
+//
+//	abs(dest_exp - src_exp) >= $8000
+//
+// and for mul when
+//
+//	(dest_exp + src_exp) < $0
+//
+// we must process the operation here if this case is true.
+//
+// The rts following the frcfpn routine is the exit from res_func
+// for this condition.  The restore flag (RES_FLG) is left clear.
+// No frestore is done unless an exception is to be reported.
+//
+// For fadd: 
+//	if(sign_of(dest) != sign_of(src))
+//		replace exponent of src with $3fff (keep sign)
+//		use fpu to perform dest+new_src (user's rmode and X)
+//		clr sticky
+//	else
+//		set sticky
+//	call round with user's precision and mode
+//	move result to fpn and wbtemp
+//
+// For fsub:
+//	if(sign_of(dest) == sign_of(src))
+//		replace exponent of src with $3fff (keep sign)
+//		use fpu to perform dest+new_src (user's rmode and X)
+//		clr sticky
+//	else
+//		set sticky
+//	call round with user's precision and mode
+//	move result to fpn and wbtemp
+//
+// For fdiv/fsgldiv:
+//	if(both operands are denorm)
+//		restore_to_fpu;
+//	if(dest is norm)
+//		force_ovf;
+//	else(dest is denorm)
+//		force_unf:
+//
+// For fcmp:
+//	if(dest is norm)
+//		N = sign_of(dest);
+//	else(dest is denorm)
+//		N = sign_of(src);
+//
+// For fmul:
+//	if(both operands are denorm)
+//		force_unf;
+//	if((dest_exp + src_exp) < 0)
+//		force_unf:
+//	else
+//		restore_to_fpu;
+//
+// local equates:
+	.set	addcode,0x22
+	.set	subcode,0x28
+	.set	mulcode,0x23
+	.set	divcode,0x20
+	.set	cmpcode,0x38
+ck_wrap:
+	| tstb	DY_MO_FLG(%a6)	;check for fsqrt
+	beq	fix_stk		//if zero, it is fsqrt
+	movew	CMDREG1B(%a6),%d0
+	andiw	#0x3b,%d0		//strip to command bits
+	cmpiw	#addcode,%d0
+	beq	wrap_add
+	cmpiw	#subcode,%d0
+	beq	wrap_sub
+	cmpiw	#mulcode,%d0
+	beq	wrap_mul
+	cmpiw	#cmpcode,%d0
+	beq	wrap_cmp
+//
+// Inst is fdiv.  
+//
+wrap_div:
+	cmpb	#0xff,DNRM_FLG(%a6) //if both ops denorm, 
+	beq	fix_stk		 //restore to fpu
+//
+// One of the ops is denormalized.  Test for wrap condition
+// and force the result.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //check for dest denorm
+	bnes	div_srcd
+div_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	//get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	//get dest exp (always neg)
+	subl	%d1,%d0			//subtract dest from src
+	cmpl	#0x7fff,%d0
+	blt	fix_stk			//if less, not wrap case
+	clrb	WBTEMP_SGN(%a6)
+	movew	ETEMP_EX(%a6),%d0		//find the sign of the result
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beq	force_unf
+	st	WBTEMP_SGN(%a6)
+	bra	force_unf
+
+ckinf_ns:
+	moveb	STAG(%a6),%d0		//check source tag for inf or nan
+	bra	ck_in_com
+ckinf_nd:
+	moveb	DTAG(%a6),%d0		//check destination tag for inf or nan
+ck_in_com:	
+	andib	#0x60,%d0			//isolate tag bits
+	cmpb	#0x40,%d0			//is it inf?
+	beq	nan_or_inf		//not wrap case
+	cmpb	#0x60,%d0			//is it nan?
+	beq	nan_or_inf		//yes, not wrap case?
+	cmpb	#0x20,%d0			//is it a zero?
+	beq	nan_or_inf		//yes
+	clrl	%d0
+	rts				//then ; it is either a zero of norm,
+//					;check wrap case
+nan_or_inf:
+	moveql	#-1,%d0
+	rts
+
+
+
+div_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	//get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	//get src exp (always neg)
+	subl	%d1,%d0			//subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+	clrb	WBTEMP_SGN(%a6)
+	movew	ETEMP_EX(%a6),%d0		//find the sign of the result
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beqs	force_ovf
+	st	WBTEMP_SGN(%a6)
+//
+// This code handles the case of the instruction resulting in 
+// an overflow condition.
+//
+force_ovf:
+	bclrb	#E1,E_BYTE(%a6)
+	orl	#ovfl_inx_mask,USER_FPSR(%a6)
+	clrw	NMNEXC(%a6)
+	leal	WBTEMP(%a6),%a0		//point a0 to memory location
+	movew	CMDREG1B(%a6),%d0
+	btstl	#6,%d0			//test for forced precision
+	beqs	frcovf_fpcr
+	btstl	#2,%d0			//check for double
+	bnes	frcovf_dbl
+	movel	#0x1,%d0			//inst is forced single
+	bras	frcovf_rnd
+frcovf_dbl:
+	movel	#0x2,%d0			//inst is forced double
+	bras	frcovf_rnd
+frcovf_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//inst not forced - use fpcr prec
+frcovf_rnd:
+
+// The 881/882 does not set inex2 for the following case, so the 
+// line is commented out to be compatible with 881/882
+//	tst.b	%d0
+//	beq.b	frcovf_x
+//	or.l	#inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
+
+//frcovf_x:
+	bsrl	ovf_res			//get correct result based on
+//					;round precision/mode.  This 
+//					;sets FPSR_CC correctly
+//					;returns in external format
+	bfclr	WBTEMP_SGN(%a6){#0:#8}
+	beq	frcfpn
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpn
+//
+// Inst is fadd.
+//
+wrap_add:
+	cmpb	#0xff,DNRM_FLG(%a6) //if both ops denorm, 
+	beq	fix_stk		 //restore to fpu
+//
+// One of the ops is denormalized.  Test for wrap condition
+// and complete the instruction.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //check for dest denorm
+	bnes	add_srcd
+add_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	//get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	//get dest exp (always neg)
+	subl	%d1,%d0			//subtract dest from src
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+	bra	add_wrap
+add_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	//get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	//get src exp (always neg)
+	subl	%d1,%d0			//subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+//
+// Check the signs of the operands.  If they are unlike, the fpu
+// can be used to add the norm and 1.0 with the sign of the
+// denorm and it will correctly generate the result in extended
+// precision.  We can then call round with no sticky and the result
+// will be correct for the user's rounding mode and precision.  If
+// the signs are the same, we call round with the sticky bit set
+// and the result will be correct for the user's rounding mode and
+// precision.
+//
+add_wrap:
+	movew	ETEMP_EX(%a6),%d0
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beq	add_same
+//
+// The signs are unlike.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //is dest the denorm?
+	bnes	add_u_srcd
+	movew	FPTEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	//force the exponent to +/- 1
+	movew	%d0,FPTEMP_EX(%a6) //in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		//set up users rmode and X
+	fmovex	ETEMP(%a6),%fp0
+	faddx	FPTEMP(%a6),%fp0
+	leal	WBTEMP(%a6),%a0	//point a0 to wbtemp in frame
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) //capture cc's and inex from fadd
+	fmovex	%fp0,WBTEMP(%a6)	//write result to memory
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	clrl	%d0		//force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+add_u_srcd:
+	movew	ETEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	//force the exponent to +/- 1
+	movew	%d0,ETEMP_EX(%a6) //in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		//set up users rmode and X
+	fmovex	ETEMP(%a6),%fp0
+	faddx	FPTEMP(%a6),%fp0
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) //capture cc's and inex from fadd
+	leal	WBTEMP(%a6),%a0	//point a0 to wbtemp in frame
+	fmovex	%fp0,WBTEMP(%a6)	//write result to memory
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	clrl	%d0		//force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)	//use internal format for round
+	bsrl	round		//round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+//
+// Signs are alike:
+//
+add_same:
+	cmpb	#0x0f,DNRM_FLG(%a6) //is dest the denorm?
+	bnes	add_s_srcd
+add_s_destd:
+	leal	ETEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	movel	#0x20000000,%d0	//set sticky for round
+	bclrb	#sign_bit,ETEMP_EX(%a6)
+	sne	ETEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	ETEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beqs	add_s_dclr
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+add_s_dclr:
+	leal	WBTEMP(%a6),%a0
+	movel	ETEMP(%a6),(%a0)	//write result to wbtemp
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	tstw	ETEMP_EX(%a6)
+	bgt	add_ckovf
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	add_ckovf
+add_s_srcd:
+	leal	FPTEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	movel	#0x20000000,%d0	//set sticky for round
+	bclrb	#sign_bit,FPTEMP_EX(%a6)
+	sne	FPTEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	FPTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beqs	add_s_sclr
+	bsetb	#sign_bit,FPTEMP_EX(%a6)
+add_s_sclr:
+	leal	WBTEMP(%a6),%a0
+	movel	FPTEMP(%a6),(%a0)	//write result to wbtemp
+	movel	FPTEMP_HI(%a6),4(%a0)
+	movel	FPTEMP_LO(%a6),8(%a0)
+	tstw	FPTEMP_EX(%a6)
+	bgt	add_ckovf
+	orl	#neg_mask,USER_FPSR(%a6)
+add_ckovf:
+	movew	WBTEMP_EX(%a6),%d0
+	andiw	#0x7fff,%d0
+	cmpiw	#0x7fff,%d0
+	bne	frcfpnr
+//
+// The result has overflowed to $7fff exponent.  Set I, ovfl,
+// and aovfl, and clr the mantissa (incorrectly set by the
+// round routine.)
+//
+	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)	
+	clrl	4(%a0)
+	bra	frcfpnr
+//
+// Inst is fsub.
+//
+wrap_sub:
+	cmpb	#0xff,DNRM_FLG(%a6) //if both ops denorm, 
+	beq	fix_stk		 //restore to fpu
+//
+// One of the ops is denormalized.  Test for wrap condition
+// and complete the instruction.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //check for dest denorm
+	bnes	sub_srcd
+sub_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	//get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	//get dest exp (always neg)
+	subl	%d1,%d0			//subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+	bra	sub_wrap
+sub_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	//get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	//get src exp (always neg)
+	subl	%d1,%d0			//subtract dest from src
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+//
+// Check the signs of the operands.  If they are alike, the fpu
+// can be used to subtract from the norm 1.0 with the sign of the
+// denorm and it will correctly generate the result in extended
+// precision.  We can then call round with no sticky and the result
+// will be correct for the user's rounding mode and precision.  If
+// the signs are unlike, we call round with the sticky bit set
+// and the result will be correct for the user's rounding mode and
+// precision.
+//
+sub_wrap:
+	movew	ETEMP_EX(%a6),%d0
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	bne	sub_diff
+//
+// The signs are alike.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //is dest the denorm?
+	bnes	sub_u_srcd
+	movew	FPTEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	//force the exponent to +/- 1
+	movew	%d0,FPTEMP_EX(%a6) //in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		//set up users rmode and X
+	fmovex	FPTEMP(%a6),%fp0
+	fsubx	ETEMP(%a6),%fp0
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) //capture cc's and inex from fadd
+	leal	WBTEMP(%a6),%a0	//point a0 to wbtemp in frame
+	fmovex	%fp0,WBTEMP(%a6)	//write result to memory
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	clrl	%d0		//force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+sub_u_srcd:
+	movew	ETEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	//force the exponent to +/- 1
+	movew	%d0,ETEMP_EX(%a6) //in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		//set up users rmode and X
+	fmovex	FPTEMP(%a6),%fp0
+	fsubx	ETEMP(%a6),%fp0
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) //capture cc's and inex from fadd
+	leal	WBTEMP(%a6),%a0	//point a0 to wbtemp in frame
+	fmovex	%fp0,WBTEMP(%a6)	//write result to memory
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	clrl	%d0		//force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+//
+// Signs are unlike:
+//
+sub_diff:
+	cmpb	#0x0f,DNRM_FLG(%a6) //is dest the denorm?
+	bnes	sub_s_srcd
+sub_s_destd:
+	leal	ETEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	movel	#0x20000000,%d0	//set sticky for round
+//
+// Since the dest is the denorm, the sign is the opposite of the
+// norm sign.
+//
+	eoriw	#0x8000,ETEMP_EX(%a6)	//flip sign on result
+	tstw	ETEMP_EX(%a6)
+	bgts	sub_s_dwr
+	orl	#neg_mask,USER_FPSR(%a6)
+sub_s_dwr:
+	bclrb	#sign_bit,ETEMP_EX(%a6)
+	sne	ETEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	ETEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beqs	sub_s_dclr
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+sub_s_dclr:
+	leal	WBTEMP(%a6),%a0
+	movel	ETEMP(%a6),(%a0)	//write result to wbtemp
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	bra	sub_ckovf
+sub_s_srcd:
+	leal	FPTEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		//put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		//put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		//set up for round call
+	movel	#0x20000000,%d0	//set sticky for round
+	bclrb	#sign_bit,FPTEMP_EX(%a6)
+	sne	FPTEMP_SGN(%a6)
+	bsrl	round		//round result to users rmode & prec
+	bfclr	FPTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beqs	sub_s_sclr
+	bsetb	#sign_bit,FPTEMP_EX(%a6)
+sub_s_sclr:
+	leal	WBTEMP(%a6),%a0
+	movel	FPTEMP(%a6),(%a0)	//write result to wbtemp
+	movel	FPTEMP_HI(%a6),4(%a0)
+	movel	FPTEMP_LO(%a6),8(%a0)
+	tstw	FPTEMP_EX(%a6)
+	bgt	sub_ckovf
+	orl	#neg_mask,USER_FPSR(%a6)
+sub_ckovf:
+	movew	WBTEMP_EX(%a6),%d0
+	andiw	#0x7fff,%d0
+	cmpiw	#0x7fff,%d0
+	bne	frcfpnr
+//
+// The result has overflowed to $7fff exponent.  Set I, ovfl,
+// and aovfl, and clr the mantissa (incorrectly set by the
+// round routine.)
+//
+	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)	
+	clrl	4(%a0)
+	bra	frcfpnr
+//
+// Inst is fcmp.
+//
+wrap_cmp:
+	cmpb	#0xff,DNRM_FLG(%a6) //if both ops denorm, 
+	beq	fix_stk		 //restore to fpu
+//
+// One of the ops is denormalized.  Test for wrap condition
+// and complete the instruction.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //check for dest denorm
+	bnes	cmp_srcd
+cmp_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	//get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	//get dest exp (always neg)
+	subl	%d1,%d0			//subtract dest from src
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+	tstw	ETEMP_EX(%a6)		//set N to ~sign_of(src)
+	bge	cmp_setn
+	rts
+cmp_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	//get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	//get src exp (always neg)
+	subl	%d1,%d0			//subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			//if less, not wrap case
+	tstw	FPTEMP_EX(%a6)		//set N to sign_of(dest)
+	blt	cmp_setn
+	rts
+cmp_setn:
+	orl	#neg_mask,USER_FPSR(%a6)
+	rts
+
+//
+// Inst is fmul.
+//
+wrap_mul:
+	cmpb	#0xff,DNRM_FLG(%a6) //if both ops denorm, 
+	beq	force_unf	//force an underflow (really!)
+//
+// One of the ops is denormalized.  Test for wrap condition
+// and complete the instruction.
+//
+	cmpb	#0x0f,DNRM_FLG(%a6) //check for dest denorm
+	bnes	mul_srcd
+mul_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	//get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	//get dest exp (always neg)
+	addl	%d1,%d0			//subtract dest from src
+	bgt	fix_stk
+	bra	force_unf
+mul_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	//get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	//get src exp (always neg)
+	addl	%d1,%d0			//subtract src from dest
+	bgt	fix_stk
+	
+//
+// This code handles the case of the instruction resulting in 
+// an underflow condition.
+//
+force_unf:
+	bclrb	#E1,E_BYTE(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	clrw	NMNEXC(%a6)
+	clrb	WBTEMP_SGN(%a6)
+	movew	ETEMP_EX(%a6),%d0		//find the sign of the result
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beqs	frcunfcont
+	st	WBTEMP_SGN(%a6)
+frcunfcont:
+	lea	WBTEMP(%a6),%a0		//point a0 to memory location
+	movew	CMDREG1B(%a6),%d0
+	btstl	#6,%d0			//test for forced precision
+	beqs	frcunf_fpcr
+	btstl	#2,%d0			//check for double
+	bnes	frcunf_dbl
+	movel	#0x1,%d0			//inst is forced single
+	bras	frcunf_rnd
+frcunf_dbl:
+	movel	#0x2,%d0			//inst is forced double
+	bras	frcunf_rnd
+frcunf_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//inst not forced - use fpcr prec
+frcunf_rnd:
+	bsrl	unf_sub			//get correct result based on
+//					;round precision/mode.  This 
+//					;sets FPSR_CC correctly
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beqs	frcfpn
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpn
+
+//
+// Write the result to the user's fpn.  All results must be HUGE to be
+// written; otherwise the results would have overflowed or underflowed.
+// If the rounding precision is single or double, the ovf_res routine
+// is needed to correctly supply the max value.
+//
+frcfpnr:
+	movew	CMDREG1B(%a6),%d0
+	btstl	#6,%d0			//test for forced precision
+	beqs	frcfpn_fpcr
+	btstl	#2,%d0			//check for double
+	bnes	frcfpn_dbl
+	movel	#0x1,%d0			//inst is forced single
+	bras	frcfpn_rnd
+frcfpn_dbl:
+	movel	#0x2,%d0			//inst is forced double
+	bras	frcfpn_rnd
+frcfpn_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//inst not forced - use fpcr prec
+	tstb	%d0
+	beqs	frcfpn			//if extended, write what you got
+frcfpn_rnd:
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	ovf_res			//get correct result based on
+//					;round precision/mode.  This 
+//					;sets FPSR_CC correctly
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	//convert back to IEEE ext format
+	beqs	frcfpn_clr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+frcfpn_clr:
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+// 
+// Perform the write.
+//
+frcfpn:
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	//extract fp destination register
+	cmpib	#3,%d0
+	bles	frc0123			//check if dest is fp0-fp3
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx WBTEMP(%a6),%d0
+	rts
+frc0123:
+	cmpib	#0,%d0
+	beqs	frc0_dst
+	cmpib	#1,%d0
+	beqs	frc1_dst 
+	cmpib	#2,%d0
+	beqs	frc2_dst 
+frc3_dst:
+	movel	WBTEMP_EX(%a6),USER_FP3(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP3+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP3+8(%a6)
+	rts
+frc2_dst:
+	movel	WBTEMP_EX(%a6),USER_FP2(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP2+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP2+8(%a6)
+	rts
+frc1_dst:
+	movel	WBTEMP_EX(%a6),USER_FP1(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP1+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP1+8(%a6)
+	rts
+frc0_dst:
+	movel	WBTEMP_EX(%a6),USER_FP0(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP0+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP0+8(%a6)
+	rts
+
+//
+// Write etemp to fpn.
+// A check is made on enabled and signalled snan exceptions,
+// and the destination is not overwritten if this condition exists.
+// This code is designed to make fmoveins of unsupported data types
+// faster.
+//
+wr_etemp:
+	btstb	#snan_bit,FPSR_EXCEPT(%a6)	//if snan is set, and
+	beqs	fmoveinc		//enabled, force restore
+	btstb	#snan_bit,FPCR_ENABLE(%a6) //and don't overwrite
+	beqs	fmoveinc		//the dest
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	//set up fptemp sign for 
+//						;snan handler
+	tstb	ETEMP(%a6)		//check for negative
+	blts	snan_neg
+	rts
+snan_neg:
+	orl	#neg_bit,USER_FPSR(%a6)	//snan is negative; set N
+	rts
+fmoveinc:
+	clrw	NMNEXC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+	moveb	STAG(%a6),%d0		//check if stag is inf
+	andib	#0xe0,%d0
+	cmpib	#0x40,%d0
+	bnes	fminc_cnan
+	orl	#inf_mask,USER_FPSR(%a6) //if inf, nothing yet has set I
+	tstw	LOCAL_EX(%a0)		//check sign
+	bges	fminc_con
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	fminc_con
+fminc_cnan:
+	cmpib	#0x60,%d0			//check if stag is NaN
+	bnes	fminc_czero
+	orl	#nan_mask,USER_FPSR(%a6) //if nan, nothing yet has set NaN
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	//set up fptemp sign for 
+//						;snan handler
+	tstw	LOCAL_EX(%a0)		//check sign
+	bges	fminc_con
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	fminc_con
+fminc_czero:
+	cmpib	#0x20,%d0			//check if zero
+	bnes	fminc_con
+	orl	#z_mask,USER_FPSR(%a6)	//if zero, set Z
+	tstw	LOCAL_EX(%a0)		//check sign
+	bges	fminc_con
+	orl	#neg_mask,USER_FPSR(%a6)
+fminc_con:
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	//extract fp destination register
+	cmpib	#3,%d0
+	bles	fp0123			//check if dest is fp0-fp3
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx ETEMP(%a6),%d0
+	rts
+
+fp0123:
+	cmpib	#0,%d0
+	beqs	fp0_dst
+	cmpib	#1,%d0
+	beqs	fp1_dst 
+	cmpib	#2,%d0
+	beqs	fp2_dst 
+fp3_dst:
+	movel	ETEMP_EX(%a6),USER_FP3(%a6)
+	movel	ETEMP_HI(%a6),USER_FP3+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP3+8(%a6)
+	rts
+fp2_dst:
+	movel	ETEMP_EX(%a6),USER_FP2(%a6)
+	movel	ETEMP_HI(%a6),USER_FP2+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP2+8(%a6)
+	rts
+fp1_dst:
+	movel	ETEMP_EX(%a6),USER_FP1(%a6)
+	movel	ETEMP_HI(%a6),USER_FP1+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP1+8(%a6)
+	rts
+fp0_dst:
+	movel	ETEMP_EX(%a6),USER_FP0(%a6)
+	movel	ETEMP_HI(%a6),USER_FP0+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP0+8(%a6)
+	rts
+
+opclass3:
+	st	CU_ONLY(%a6)
+	movew	CMDREG1B(%a6),%d0	//check if packed moveout
+	andiw	#0x0c00,%d0	//isolate last 2 bits of size field
+	cmpiw	#0x0c00,%d0	//if size is 011 or 111, it is packed
+	beq	pack_out	//else it is norm or denorm
+	bra	mv_out
+
+	
+//
+//	MOVE OUT
+//
+
+mv_tbl:
+	.long	li
+	.long 	sgp
+	.long 	xp
+	.long 	mvout_end	//should never be taken
+	.long 	wi
+	.long 	dp
+	.long 	bi
+	.long 	mvout_end	//should never be taken
+mv_out:
+	bfextu	CMDREG1B(%a6){#3:#3},%d1	//put source specifier in d1
+	leal	mv_tbl,%a0
+	movel	%a0@(%d1:l:4),%a0
+	jmp	(%a0)
+
+//
+// This exit is for move-out to memory.  The aunfl bit is 
+// set if the result is inex and unfl is signalled.
+//
+mvout_end:
+	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs	no_aufl
+	btstb	#unfl_bit,FPSR_EXCEPT(%a6)
+	beqs	no_aufl
+	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_aufl:
+	clrw	NMNEXC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+	fmovel	#0,%FPSR			//clear any cc bits from res_func
+//
+// Return ETEMP to extended format from internal extended format so
+// that gen_except will have a correctly signed value for ovfl/unfl
+// handlers.
+//
+	bfclr	ETEMP_SGN(%a6){#0:#8}
+	beqs	mvout_con
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+mvout_con:
+	rts
+//
+// This exit is for move-out to int register.  The aunfl bit is 
+// not set in any case for this move.
+//
+mvouti_end:
+	clrw	NMNEXC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+	fmovel	#0,%FPSR			//clear any cc bits from res_func
+//
+// Return ETEMP to extended format from internal extended format so
+// that gen_except will have a correctly signed value for ovfl/unfl
+// handlers.
+//
+	bfclr	ETEMP_SGN(%a6){#0:#8}
+	beqs	mvouti_con
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+mvouti_con:
+	rts
+//
+// li is used to handle a long integer source specifier
+//
+
+li:
+	moveql	#4,%d0		//set byte count
+
+	btstb	#7,STAG(%a6)	//check for extended denorm
+	bne	int_dnrm	//if so, branch
+
+	fmovemx ETEMP(%a6),%fp0-%fp0
+	fcmpd	#0x41dfffffffc00000,%fp0
+// 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
+	fbge	lo_plrg	
+	fcmpd	#0xc1e0000000000000,%fp0
+// c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
+	fble	lo_nlrg
+//
+// at this point, the answer is between the largest pos and neg values
+//
+	movel	USER_FPCR(%a6),%d1	//use user's rounding mode
+	andil	#0x30,%d1
+	fmovel	%d1,%fpcr
+	fmovel	%fp0,L_SCR1(%a6)	//let the 040 perform conversion
+	fmovel %fpsr,%d1
+	orl	%d1,USER_FPSR(%a6)	//capture inex2/ainex if set
+	bra	int_wrt
+
+
+lo_plrg:
+	movel	#0x7fffffff,L_SCR1(%a6)	//answer is largest positive int
+	fbeq	int_wrt			//exact answer
+	fcmpd	#0x41dfffffffe00000,%fp0
+// 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
+	fbge	int_operr		//set operr
+	bra	int_inx			//set inexact
+
+lo_nlrg:
+	movel	#0x80000000,L_SCR1(%a6)
+	fbeq	int_wrt			//exact answer
+	fcmpd	#0xc1e0000000100000,%fp0
+// c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
+	fblt	int_operr		//set operr
+	bra	int_inx			//set inexact
+
+//
+// wi is used to handle a word integer source specifier
+//
+
+wi:
+	moveql	#2,%d0		//set byte count
+
+	btstb	#7,STAG(%a6)	//check for extended denorm
+	bne	int_dnrm	//branch if so
+
+	fmovemx ETEMP(%a6),%fp0-%fp0
+	fcmps	#0x46fffe00,%fp0
+// 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
+	fbge	wo_plrg	
+	fcmps	#0xc7000000,%fp0
+// c7000000 in sgl prec = c00e00008000000000000000 in ext prec
+	fble	wo_nlrg
+
+//
+// at this point, the answer is between the largest pos and neg values
+//
+	movel	USER_FPCR(%a6),%d1	//use user's rounding mode
+	andil	#0x30,%d1
+	fmovel	%d1,%fpcr
+	fmovew	%fp0,L_SCR1(%a6)	//let the 040 perform conversion
+	fmovel %fpsr,%d1
+	orl	%d1,USER_FPSR(%a6)	//capture inex2/ainex if set
+	bra	int_wrt
+
+wo_plrg:
+	movew	#0x7fff,L_SCR1(%a6)	//answer is largest positive int
+	fbeq	int_wrt			//exact answer
+	fcmps	#0x46ffff00,%fp0
+// 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
+	fbge	int_operr		//set operr
+	bra	int_inx			//set inexact
+
+wo_nlrg:
+	movew	#0x8000,L_SCR1(%a6)
+	fbeq	int_wrt			//exact answer
+	fcmps	#0xc7000080,%fp0
+// c7000080 in sgl prec = c00e00008000800000000000 in ext prec
+	fblt	int_operr		//set operr
+	bra	int_inx			//set inexact
+
+//
+// bi is used to handle a byte integer source specifier
+//
+
+bi:
+	moveql	#1,%d0		//set byte count
+
+	btstb	#7,STAG(%a6)	//check for extended denorm
+	bne	int_dnrm	//branch if so
+
+	fmovemx ETEMP(%a6),%fp0-%fp0
+	fcmps	#0x42fe0000,%fp0
+// 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
+	fbge	by_plrg	
+	fcmps	#0xc3000000,%fp0
+// c3000000 in sgl prec = c00600008000000000000000 in ext prec
+	fble	by_nlrg
+
+//
+// at this point, the answer is between the largest pos and neg values
+//
+	movel	USER_FPCR(%a6),%d1	//use user's rounding mode
+	andil	#0x30,%d1
+	fmovel	%d1,%fpcr
+	fmoveb	%fp0,L_SCR1(%a6)	//let the 040 perform conversion
+	fmovel %fpsr,%d1
+	orl	%d1,USER_FPSR(%a6)	//capture inex2/ainex if set
+	bra	int_wrt
+
+by_plrg:
+	moveb	#0x7f,L_SCR1(%a6)		//answer is largest positive int
+	fbeq	int_wrt			//exact answer
+	fcmps	#0x42ff0000,%fp0
+// 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
+	fbge	int_operr		//set operr
+	bra	int_inx			//set inexact
+
+by_nlrg:
+	moveb	#0x80,L_SCR1(%a6)
+	fbeq	int_wrt			//exact answer
+	fcmps	#0xc3008000,%fp0
+// c3008000 in sgl prec = c00600008080000000000000 in ext prec
+	fblt	int_operr		//set operr
+	bra	int_inx			//set inexact
+
+//
+// Common integer routines
+//
+// int_drnrm---account for possible nonzero result for round up with positive
+// operand and round down for negative answer.  In the first case (result = 1)
+// byte-width (store in d0) of result must be honored.  In the second case,
+// -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
+
+int_dnrm:
+	movel	#0,L_SCR1(%a6)	// initialize result to 0
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	// d1 is the rounding mode
+	cmpb	#2,%d1		
+	bmis	int_inx		// if RN or RZ, done
+	bnes	int_rp		// if RP, continue below
+	tstw	ETEMP(%a6)	// RM: store -1 in L_SCR1 if src is negative
+	bpls	int_inx		// otherwise result is 0
+	movel	#-1,L_SCR1(%a6)
+	bras	int_inx
+int_rp:
+	tstw	ETEMP(%a6)	// RP: store +1 of proper width in L_SCR1 if
+//				; source is greater than 0
+	bmis	int_inx		// otherwise, result is 0
+	lea	L_SCR1(%a6),%a1	// a1 is address of L_SCR1
+	addal	%d0,%a1		// offset by destination width -1
+	subal	#1,%a1		
+	bsetb	#0,(%a1)		// set low bit at a1 address
+int_inx:
+	oril	#inx2a_mask,USER_FPSR(%a6)
+	bras	int_wrt
+int_operr:
+	fmovemx %fp0-%fp0,FPTEMP(%a6)	//FPTEMP must contain the extended
+//				;precision source that needs to be
+//				;converted to integer this is required
+//				;if the operr exception is enabled.
+//				;set operr/aiop (no inex2 on int ovfl)
+
+	oril	#opaop_mask,USER_FPSR(%a6)
+//				;fall through to perform int_wrt
+int_wrt: 
+	movel	EXC_EA(%a6),%a1	//load destination address
+	tstl	%a1		//check to see if it is a dest register
+	beqs	wrt_dn		//write data register 
+	lea	L_SCR1(%a6),%a0	//point to supervisor source address
+	bsrl	mem_write
+	bra	mvouti_end
+
+wrt_dn:
+	movel	%d0,-(%sp)	//d0 currently contains the size to write
+	bsrl	get_fline	//get_fline returns Dn in d0
+	andiw	#0x7,%d0		//isolate register
+	movel	(%sp)+,%d1	//get size
+	cmpil	#4,%d1		//most frequent case
+	beqs	sz_long
+	cmpil	#2,%d1
+	bnes	sz_con
+	orl	#8,%d0		//add 'word' size to register#
+	bras	sz_con
+sz_long:
+	orl	#0x10,%d0		//add 'long' size to register#
+sz_con:
+	movel	%d0,%d1		//reg_dest expects size:reg in d1
+	bsrl	reg_dest	//load proper data register
+	bra	mvouti_end 
+xp:
+	lea	ETEMP(%a6),%a0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	btstb	#7,STAG(%a6)	//check for extended denorm
+	bne	xdnrm
+	clrl	%d0
+	bras	do_fp		//do normal case
+sgp:
+	lea	ETEMP(%a6),%a0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	btstb	#7,STAG(%a6)	//check for extended denorm
+	bne	sp_catas	//branch if so
+	movew	LOCAL_EX(%a0),%d0
+	lea	sp_bnds,%a1
+	cmpw	(%a1),%d0
+	blt	sp_under
+	cmpw	2(%a1),%d0
+	bgt	sp_over
+	movel	#1,%d0		//set destination format to single
+	bras	do_fp		//do normal case
+dp:
+	lea	ETEMP(%a6),%a0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+
+	btstb	#7,STAG(%a6)	//check for extended denorm
+	bne	dp_catas	//branch if so
+
+	movew	LOCAL_EX(%a0),%d0
+	lea	dp_bnds,%a1
+
+	cmpw	(%a1),%d0
+	blt	dp_under
+	cmpw	2(%a1),%d0
+	bgt	dp_over
+	
+	movel	#2,%d0		//set destination format to double
+//				;fall through to do_fp
+//
+do_fp:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//rnd mode in d1
+	swap	%d0			//rnd prec in upper word
+	addl	%d0,%d1			//d1 has PREC/MODE info
+	
+	clrl	%d0			//clear g,r,s 
+
+	bsrl	round			//round 
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0
+
+	bfextu	CMDREG1B(%a6){#3:#3},%d1	//extract destination format
+//					;at this point only the dest
+//					;formats sgl, dbl, ext are
+//					;possible
+	cmpb	#2,%d1
+	bgts	ddbl			//double=5, extended=2, single=1
+	bnes	dsgl
+//					;fall through to dext
+dext:
+	bsrl	dest_ext
+	bra	mvout_end
+dsgl:
+	bsrl	dest_sgl
+	bra	mvout_end
+ddbl:
+	bsrl	dest_dbl
+	bra	mvout_end
+
+//
+// Handle possible denorm or catastrophic underflow cases here
+//
+xdnrm:
+	bsr	set_xop		//initialize WBTEMP
+	bsetb	#wbtemp15_bit,WB_BYTE(%a6) //set wbtemp15
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0	//a0 has the destination pointer
+	bsrl	dest_ext	//store to memory
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
+	bra	mvout_end
+	
+sp_under:
+	bsetb	#etemp15_bit,STAG(%a6)
+
+	cmpw	4(%a1),%d0
+	blts	sp_catas	//catastrophic underflow case	
+
+	movel	#1,%d0		//load in round precision
+	movel	#sgl_thresh,%d1	//load in single denorm threshold
+	bsrl	dpspdnrm	//expects d1 to have the proper
+//				;denorm threshold
+	bsrl	dest_sgl	//stores value to destination
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
+	bra	mvout_end	//exit
+
+dp_under:
+	bsetb	#etemp15_bit,STAG(%a6)
+
+	cmpw	4(%a1),%d0
+	blts	dp_catas	//catastrophic underflow case
+		
+	movel	#dbl_thresh,%d1	//load in double precision threshold
+	movel	#2,%d0		
+	bsrl	dpspdnrm	//expects d1 to have proper
+//				;denorm threshold
+//				;expects d0 to have round precision
+	bsrl	dest_dbl	//store value to destination
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
+	bra	mvout_end	//exit
+
+//
+// Handle catastrophic underflow cases here
+//
+sp_catas:
+// Temp fix for z bit set in unf_sub
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#1,%d0		//set round precision to sgl
+
+	bsrl	unf_sub		//a0 points to result
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	#1,%d0
+	subw	%d0,LOCAL_EX(%a0) //account for difference between
+//				;denorm/norm bias
+
+	movel	%a0,%a1		//a1 has the operand input
+	movel	EXC_EA(%a6),%a0	//a0 has the destination pointer
+	
+	bsrl	dest_sgl	//store the result
+	oril	#unfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+	
+dp_catas:
+// Temp fix for z bit set in unf_sub
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#2,%d0		//set round precision to dbl
+	bsrl	unf_sub		//a0 points to result
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	#1,%d0
+	subw	%d0,LOCAL_EX(%a0) //account for difference between 
+//				;denorm/norm bias
+
+	movel	%a0,%a1		//a1 has the operand input
+	movel	EXC_EA(%a6),%a0	//a0 has the destination pointer
+	
+	bsrl	dest_dbl	//store the result
+	oril	#unfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+//
+// Handle catastrophic overflow cases here
+//
+sp_over:
+// Temp fix for z bit set in unf_sub
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#1,%d0
+	leal	FP_SCR1(%a6),%a0	//use FP_SCR1 for creating result
+	movel	ETEMP_EX(%a6),(%a0)
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	bsrl	ovf_res
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0
+	bsrl	dest_sgl
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+dp_over:
+// Temp fix for z bit set in ovf_res
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#2,%d0
+	leal	FP_SCR1(%a6),%a0	//use FP_SCR1 for creating result
+	movel	ETEMP_EX(%a6),(%a0)
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	bsrl	ovf_res
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0
+	bsrl	dest_dbl
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+//
+// 	DPSPDNRM
+//
+// This subroutine takes an extended normalized number and denormalizes
+// it to the given round precision. This subroutine also decrements
+// the input operand's exponent by 1 to account for the fact that
+// dest_sgl or dest_dbl expects a normalized number's bias.
+//
+// Input: a0  points to a normalized number in internal extended format
+//	 d0  is the round precision (=1 for sgl; =2 for dbl)
+//	 d1  is the the single precision or double precision
+//	     denorm threshold
+//
+// Output: (In the format for dest_sgl or dest_dbl)
+//	 a0   points to the destination
+//   	 a1   points to the operand
+//
+// Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
+//
+dpspdnrm:
+	movel	%d0,-(%a7)	//save round precision
+	clrl	%d0		//clear initial g,r,s
+	bsrl	dnrm_lp		//careful with d0, it's needed by round
+
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1 //get rounding mode
+	swap	%d1
+	movew	2(%a7),%d1	//set rounding precision 
+	swap	%d1		//at this point d1 has PREC/MODE info
+	bsrl	round		//round result, sets the inex bit in
+//				;USER_FPSR if needed
+
+	movew	#1,%d0
+	subw	%d0,LOCAL_EX(%a0) //account for difference in denorm
+//				;vs norm bias
+
+	movel	%a0,%a1		//a1 has the operand input
+	movel	EXC_EA(%a6),%a0	//a0 has the destination pointer
+	addw	#4,%a7		//pop stack
+	rts
+//
+// SET_XOP initialized WBTEMP with the value pointed to by a0
+// input: a0 points to input operand in the internal extended format
+//
+set_xop:
+	movel	LOCAL_EX(%a0),WBTEMP_EX(%a6)
+	movel	LOCAL_HI(%a0),WBTEMP_HI(%a6)
+	movel	LOCAL_LO(%a0),WBTEMP_LO(%a6)
+	bfclr	WBTEMP_SGN(%a6){#0:#8}
+	beqs	sxop
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+sxop:
+	bfclr	STAG(%a6){#5:#4}	//clear wbtm66,wbtm1,wbtm0,sbit
+	rts
+//
+//	P_MOVE
+//
+p_movet:
+	.long	p_move
+	.long	p_movez
+	.long	p_movei
+	.long	p_moven
+	.long	p_move
+p_regd:
+	.long	p_dyd0
+	.long	p_dyd1
+	.long	p_dyd2
+	.long	p_dyd3
+	.long	p_dyd4
+	.long	p_dyd5
+	.long	p_dyd6
+	.long	p_dyd7
+
+pack_out:
+ 	leal	p_movet,%a0	//load jmp table address
+	movew	STAG(%a6),%d0	//get source tag
+	bfextu	%d0{#16:#3},%d0	//isolate source bits
+	movel	(%a0,%d0.w*4),%a0	//load a0 with routine label for tag
+	jmp	(%a0)		//go to the routine
+
+p_write:
+	movel	#0x0c,%d0 	//get byte count
+	movel	EXC_EA(%a6),%a1	//get the destination address
+	bsr 	mem_write	//write the user's destination
+	moveb	#0,CU_SAVEPC(%a6) //set the cu save pc to all 0's
+
+//
+// Also note that the dtag must be set to norm here - this is because 
+// the 040 uses the dtag to execute the correct microcode.
+//
+        bfclr    DTAG(%a6){#0:#3}  //set dtag to norm
+
+	rts
+
+// Notes on handling of special case (zero, inf, and nan) inputs:
+//	1. Operr is not signalled if the k-factor is greater than 18.
+//	2. Per the manual, status bits are not set.
+//
+
+p_move:
+	movew	CMDREG1B(%a6),%d0
+	btstl	#kfact_bit,%d0	//test for dynamic k-factor
+	beqs	statick		//if clear, k-factor is static
+dynamick:
+	bfextu	%d0{#25:#3},%d0	//isolate register for dynamic k-factor
+	lea	p_regd,%a0
+	movel	%a0@(%d0:l:4),%a0
+	jmp	(%a0)
+statick:
+	andiw	#0x007f,%d0	//get k-factor
+	bfexts	%d0{#25:#7},%d0	//sign extend d0 for bindec
+	leal	ETEMP(%a6),%a0	//a0 will point to the packed decimal
+	bsrl	bindec		//perform the convert; data at a6
+	leal	FP_SCR1(%a6),%a0	//load a0 with result address
+	bral	p_write
+p_movez:
+	leal	ETEMP(%a6),%a0	//a0 will point to the packed decimal
+	clrw	2(%a0)		//clear lower word of exp
+	clrl	4(%a0)		//load second lword of ZERO
+	clrl	8(%a0)		//load third lword of ZERO
+	bra	p_write		//go write results
+p_movei:
+	fmovel	#0,%FPSR		//clear aiop
+	leal	ETEMP(%a6),%a0	//a0 will point to the packed decimal
+	clrw	2(%a0)		//clear lower word of exp
+	bra	p_write		//go write the result
+p_moven:
+	leal	ETEMP(%a6),%a0	//a0 will point to the packed decimal
+	clrw	2(%a0)		//clear lower word of exp
+	bra	p_write		//go write the result
+
+//
+// Routines to read the dynamic k-factor from Dn.
+//
+p_dyd0:
+	movel	USER_D0(%a6),%d0
+	bras	statick
+p_dyd1:
+	movel	USER_D1(%a6),%d0
+	bras	statick
+p_dyd2:
+	movel	%d2,%d0
+	bras	statick
+p_dyd3:
+	movel	%d3,%d0
+	bras	statick
+p_dyd4:
+	movel	%d4,%d0
+	bras	statick
+p_dyd5:
+	movel	%d5,%d0
+	bras	statick
+p_dyd6:
+	movel	%d6,%d0
+	bra	statick
+p_dyd7:
+	movel	%d7,%d0
+	bra	statick
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/round.s b/c/src/lib/libcpu/m68k/m68040/fpsp/round.s
new file mode 100644
index 0000000000..afccaa7d96
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/round.s
@@ -0,0 +1,649 @@
+//
+//	round.sa 3.4 7/29/91
+//
+//	handle rounding and normalization tasks
+//
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+//
+//	round --- round result according to precision/mode
+//
+//	a0 points to the input operand in the internal extended format 
+//	d1(high word) contains rounding precision:
+//		ext = $0000xxxx
+//		sgl = $0001xxxx
+//		dbl = $0002xxxx
+//	d1(low word) contains rounding mode:
+//		RN  = $xxxx0000
+//		RZ  = $xxxx0001
+//		RM  = $xxxx0010
+//		RP  = $xxxx0011
+//	d0{31:29} contains the g,r,s bits (extended)
+//
+//	On return the value pointed to by a0 is correctly rounded,
+//	a0 is preserved and the g-r-s bits in d0 are cleared.
+//	The result is not typed - the tag field is invalid.  The
+//	result is still in the internal extended format.
+//
+//	The INEX bit of USER_FPSR will be set if the rounded result was
+//	inexact (i.e. if any of the g-r-s bits were set).
+//
+
+	.global	round
+round:
+// If g=r=s=0 then result is exact and round is done, else set 
+// the inex flag in status reg and continue.  
+//
+	bsrs	ext_grs			//this subroutine looks at the 
+//					:rounding precision and sets 
+//					;the appropriate g-r-s bits.
+	tstl	%d0			//if grs are zero, go force
+	bne	rnd_cont		//lower bits to zero for size
+	
+	swap	%d1			//set up d1.w for round prec.
+	bra	truncate
+
+rnd_cont:
+//
+// Use rounding mode as an index into a jump table for these modes.
+//
+	orl	#inx2a_mask,USER_FPSR(%a6) //set inex2/ainex
+	lea	mode_tab,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+//
+// Jump table indexed by rounding mode in d1.w.  All following assumes
+// grs != 0.
+//
+mode_tab:
+	.long	rnd_near
+	.long	rnd_zero
+	.long	rnd_mnus
+	.long	rnd_plus
+//
+//	ROUND PLUS INFINITY
+//
+//	If sign of fp number = 0 (positive), then add 1 to l.
+//
+rnd_plus:
+	swap 	%d1			//set up d1 for round prec.
+	tstb	LOCAL_SGN(%a0)		//check for sign
+	bmi	truncate		//if positive then truncate
+	movel	#0xffffffff,%d0		//force g,r,s to be all f's
+	lea	add_to_l,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+//
+//	ROUND MINUS INFINITY
+//
+//	If sign of fp number = 1 (negative), then add 1 to l.
+//
+rnd_mnus:
+	swap 	%d1			//set up d1 for round prec.
+	tstb	LOCAL_SGN(%a0)		//check for sign	
+	bpl	truncate		//if negative then truncate
+	movel	#0xffffffff,%d0		//force g,r,s to be all f's
+	lea	add_to_l,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+//
+//	ROUND ZERO
+//
+//	Always truncate.
+rnd_zero:
+	swap 	%d1			//set up d1 for round prec.
+	bra	truncate
+//
+//
+//	ROUND NEAREST
+//
+//	If (g=1), then add 1 to l and if (r=s=0), then clear l
+//	Note that this will round to even in case of a tie.
+//
+rnd_near:
+	swap 	%d1			//set up d1 for round prec.
+	asll	#1,%d0			//shift g-bit to c-bit
+	bcc	truncate		//if (g=1) then
+	lea	add_to_l,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+//
+//	ext_grs --- extract guard, round and sticky bits
+//
+// Input:	d1 =		PREC:ROUND
+// Output:  	d0{31:29}=	guard, round, sticky
+//
+// The ext_grs extract the guard/round/sticky bits according to the
+// selected rounding precision. It is called by the round subroutine
+// only.  All registers except d0 are kept intact. d0 becomes an 
+// updated guard,round,sticky in d0{31:29}
+//
+// Notes: the ext_grs uses the round PREC, and therefore has to swap d1
+//	 prior to usage, and needs to restore d1 to original.
+//
+ext_grs:
+	swap	%d1			//have d1.w point to round precision
+	cmpiw	#0,%d1
+	bnes	sgl_or_dbl
+	bras	end_ext_grs
+ 
+sgl_or_dbl:
+	moveml	%d2/%d3,-(%a7)		//make some temp registers
+	cmpiw	#1,%d1
+	bnes	grs_dbl
+grs_sgl:
+	bfextu	LOCAL_HI(%a0){#24:#2},%d3	//sgl prec. g-r are 2 bits right
+	movel	#30,%d2			//of the sgl prec. limits
+	lsll	%d2,%d3			//shift g-r bits to MSB of d3
+	movel	LOCAL_HI(%a0),%d2		//get word 2 for s-bit test
+	andil	#0x0000003f,%d2		//s bit is the or of all other 
+	bnes	st_stky			//bits to the right of g-r
+	tstl	LOCAL_LO(%a0)		//test lower mantissa
+	bnes	st_stky			//if any are set, set sticky
+	tstl	%d0			//test original g,r,s
+	bnes	st_stky			//if any are set, set sticky
+	bras	end_sd			//if words 3 and 4 are clr, exit
+grs_dbl:    
+	bfextu	LOCAL_LO(%a0){#21:#2},%d3	//dbl-prec. g-r are 2 bits right
+	movel	#30,%d2			//of the dbl prec. limits
+	lsll	%d2,%d3			//shift g-r bits to the MSB of d3
+	movel	LOCAL_LO(%a0),%d2		//get lower mantissa  for s-bit test
+	andil	#0x000001ff,%d2		//s bit is the or-ing of all 
+	bnes	st_stky			//other bits to the right of g-r
+	tstl	%d0			//test word original g,r,s
+	bnes	st_stky			//if any are set, set sticky
+	bras	end_sd			//if clear, exit
+st_stky:
+	bset	#rnd_stky_bit,%d3
+end_sd:
+	movel	%d3,%d0			//return grs to d0
+	moveml	(%a7)+,%d2/%d3		//restore scratch registers
+end_ext_grs:
+	swap	%d1			//restore d1 to original
+	rts
+
+//*******************  Local Equates
+	.set	ad_1_sgl,0x00000100	//  constant to add 1 to l-bit in sgl prec
+	.set	ad_1_dbl,0x00000800	//  constant to add 1 to l-bit in dbl prec
+
+
+//Jump table for adding 1 to the l-bit indexed by rnd prec
+
+add_to_l:
+	.long	add_ext
+	.long	add_sgl
+	.long	add_dbl
+	.long	add_dbl
+//
+//	ADD SINGLE
+//
+add_sgl:
+	addl	#ad_1_sgl,LOCAL_HI(%a0)
+	bccs	scc_clr			//no mantissa overflow
+	roxrw  LOCAL_HI(%a0)		//shift v-bit back in
+	roxrw  LOCAL_HI+2(%a0)		//shift v-bit back in
+	addw	#0x1,LOCAL_EX(%a0)	//and incr exponent
+scc_clr:
+	tstl	%d0			//test for rs = 0
+	bnes	sgl_done
+	andiw  #0xfe00,LOCAL_HI+2(%a0)	//clear the l-bit
+sgl_done:
+	andil	#0xffffff00,LOCAL_HI(%a0) //truncate bits beyond sgl limit
+	clrl	LOCAL_LO(%a0)		//clear d2
+	rts
+
+//
+//	ADD EXTENDED
+//
+add_ext:
+	addql  #1,LOCAL_LO(%a0)		//add 1 to l-bit
+	bccs	xcc_clr			//test for carry out
+	addql  #1,LOCAL_HI(%a0)		//propagate carry
+	bccs	xcc_clr
+	roxrw  LOCAL_HI(%a0)		//mant is 0 so restore v-bit
+	roxrw  LOCAL_HI+2(%a0)		//mant is 0 so restore v-bit
+	roxrw	LOCAL_LO(%a0)
+	roxrw	LOCAL_LO+2(%a0)
+	addw	#0x1,LOCAL_EX(%a0)	//and inc exp
+xcc_clr:
+	tstl	%d0			//test rs = 0
+	bnes	add_ext_done
+	andib	#0xfe,LOCAL_LO+3(%a0)	//clear the l bit
+add_ext_done:
+	rts
+//
+//	ADD DOUBLE
+//
+add_dbl:
+	addl	#ad_1_dbl,LOCAL_LO(%a0)
+	bccs	dcc_clr
+	addql	#1,LOCAL_HI(%a0)		//propagate carry
+	bccs	dcc_clr
+	roxrw	LOCAL_HI(%a0)		//mant is 0 so restore v-bit
+	roxrw	LOCAL_HI+2(%a0)		//mant is 0 so restore v-bit
+	roxrw	LOCAL_LO(%a0)
+	roxrw	LOCAL_LO+2(%a0)
+	addw	#0x1,LOCAL_EX(%a0)	//incr exponent
+dcc_clr:
+	tstl	%d0			//test for rs = 0
+	bnes	dbl_done
+	andiw	#0xf000,LOCAL_LO+2(%a0)	//clear the l-bit
+
+dbl_done:
+	andil	#0xfffff800,LOCAL_LO(%a0) //truncate bits beyond dbl limit
+	rts
+
+error:
+	rts
+//
+// Truncate all other bits
+//
+trunct:
+	.long	end_rnd
+	.long	sgl_done
+	.long	dbl_done
+	.long	dbl_done
+
+truncate:
+	lea	trunct,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+end_rnd:
+	rts
+
+//
+//	NORMALIZE
+//
+// These routines (nrm_zero & nrm_set) normalize the unnorm.  This 
+// is done by shifting the mantissa left while decrementing the 
+// exponent.
+//
+// NRM_SET shifts and decrements until there is a 1 set in the integer 
+// bit of the mantissa (msb in d1).
+//
+// NRM_ZERO shifts and decrements until there is a 1 set in the integer 
+// bit of the mantissa (msb in d1) unless this would mean the exponent 
+// would go less than 0.  In that case the number becomes a denorm - the 
+// exponent (d0) is set to 0 and the mantissa (d1 & d2) is not 
+// normalized.
+//
+// Note that both routines have been optimized (for the worst case) and 
+// therefore do not have the easy to follow decrement/shift loop.
+//
+//	NRM_ZERO
+//
+//	Distance to first 1 bit in mantissa = X
+//	Distance to 0 from exponent = Y
+//	If X < Y
+//	Then
+//	  nrm_set
+//	Else
+//	  shift mantissa by Y
+//	  set exponent = 0
+//
+//input:
+//	FP_SCR1 = exponent, ms mantissa part, ls mantissa part
+//output:
+//	L_SCR1{4} = fpte15 or ete15 bit
+//
+	.global	nrm_zero
+nrm_zero:
+	movew	LOCAL_EX(%a0),%d0
+	cmpw   #64,%d0          //see if exp > 64 
+	bmis	d0_less
+	bsr	nrm_set		//exp > 64 so exp won't exceed 0 
+	rts
+d0_less:
+	moveml	%d2/%d3/%d5/%d6,-(%a7)
+	movel	LOCAL_HI(%a0),%d1
+	movel	LOCAL_LO(%a0),%d2
+
+	bfffo	%d1{#0:#32},%d3	//get the distance to the first 1 
+//				;in ms mant
+	beqs	ms_clr		//branch if no bits were set
+	cmpw	%d3,%d0		//of X>Y
+	bmis	greater		//then exp will go past 0 (neg) if 
+//				;it is just shifted
+	bsr	nrm_set		//else exp won't go past 0
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts	
+greater:
+	movel	%d2,%d6		//save ls mant in d6
+	lsll	%d0,%d2		//shift ls mant by count
+	lsll	%d0,%d1		//shift ms mant by count
+	movel	#32,%d5
+	subl	%d0,%d5		//make op a denorm by shifting bits 
+	lsrl	%d5,%d6		//by the number in the exp, then 
+//				;set exp = 0.
+	orl	%d6,%d1		//shift the ls mant bits into the ms mant
+	movel	#0,%d0		//same as if decremented exp to 0 
+//				;while shifting
+	movew	%d0,LOCAL_EX(%a0)
+	movel	%d1,LOCAL_HI(%a0)
+	movel	%d2,LOCAL_LO(%a0)
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+ms_clr:
+	bfffo	%d2{#0:#32},%d3	//check if any bits set in ls mant
+	beqs	all_clr		//branch if none set
+	addw	#32,%d3
+	cmpw	%d3,%d0		//if X>Y
+	bmis	greater		//then branch
+	bsr	nrm_set		//else exp won't go past 0
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+all_clr:
+	movew	#0,LOCAL_EX(%a0)	//no mantissa bits set. Set exp = 0.
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+//
+//	NRM_SET
+//
+	.global	nrm_set
+nrm_set:
+	movel	%d7,-(%a7)
+	bfffo	LOCAL_HI(%a0){#0:#32},%d7 //find first 1 in ms mant to d7)
+	beqs	lower		//branch if ms mant is all 0's
+
+	movel	%d6,-(%a7)
+
+	subw	%d7,LOCAL_EX(%a0)	//sub exponent by count
+	movel	LOCAL_HI(%a0),%d0	//d0 has ms mant
+	movel	LOCAL_LO(%a0),%d1 //d1 has ls mant
+
+	lsll	%d7,%d0		//shift first 1 to j bit position
+	movel	%d1,%d6		//copy ls mant into d6
+	lsll	%d7,%d6		//shift ls mant by count
+	movel	%d6,LOCAL_LO(%a0)	//store ls mant into memory
+	moveql	#32,%d6
+	subl	%d7,%d6		//continue shift
+	lsrl	%d6,%d1		//shift off all bits but those that will
+//				;be shifted into ms mant
+	orl	%d1,%d0		//shift the ls mant bits into the ms mant
+	movel	%d0,LOCAL_HI(%a0)	//store ms mant into memory
+	moveml	(%a7)+,%d7/%d6	//restore registers
+	rts
+
+//
+// We get here if ms mant was = 0, and we assume ls mant has bits 
+// set (otherwise this would have been tagged a zero not a denorm).
+//
+lower:
+	movew	LOCAL_EX(%a0),%d0	//d0 has exponent
+	movel	LOCAL_LO(%a0),%d1	//d1 has ls mant
+	subw	#32,%d0		//account for ms mant being all zeros
+	bfffo	%d1{#0:#32},%d7	//find first 1 in ls mant to d7)
+	subw	%d7,%d0		//subtract shift count from exp
+	lsll	%d7,%d1		//shift first 1 to integer bit in ms mant
+	movew	%d0,LOCAL_EX(%a0)	//store ms mant
+	movel	%d1,LOCAL_HI(%a0)	//store exp
+	clrl	LOCAL_LO(%a0)	//clear ls mant
+	movel	(%a7)+,%d7
+	rts
+//
+//	denorm --- denormalize an intermediate result
+//
+//	Used by underflow.
+//
+// Input: 
+//	a0	 points to the operand to be denormalized
+//		 (in the internal extended format)
+//		 
+//	d0: 	 rounding precision
+// Output:
+//	a0	 points to the denormalized result
+//		 (in the internal extended format)
+//
+//	d0 	is guard,round,sticky
+//
+// d0 comes into this routine with the rounding precision. It 
+// is then loaded with the denormalized exponent threshold for the 
+// rounding precision.
+//
+
+	.global	denorm
+denorm:
+	btstb	#6,LOCAL_EX(%a0)	//check for exponents between $7fff-$4000
+	beqs	no_sgn_ext	
+	bsetb	#7,LOCAL_EX(%a0)	//sign extend if it is so
+no_sgn_ext:
+
+	cmpib	#0,%d0		//if 0 then extended precision
+	bnes	not_ext		//else branch
+
+	clrl	%d1		//load d1 with ext threshold
+	clrl	%d0		//clear the sticky flag
+	bsr	dnrm_lp		//denormalize the number
+	tstb	%d1		//check for inex
+	beq	no_inex		//if clr, no inex
+	bras	dnrm_inex	//if set, set inex
+
+not_ext:
+	cmpil	#1,%d0		//if 1 then single precision
+	beqs	load_sgl	//else must be 2, double prec
+
+load_dbl:
+	movew	#dbl_thresh,%d1	//put copy of threshold in d1
+	movel	%d1,%d0		//copy d1 into d0
+	subw	LOCAL_EX(%a0),%d0	//diff = threshold - exp
+	cmpw	#67,%d0		//if diff > 67 (mant + grs bits)
+	bpls	chk_stky	//then branch (all bits would be 
+//				; shifted off in denorm routine)
+	clrl	%d0		//else clear the sticky flag
+	bsr	dnrm_lp		//denormalize the number
+	tstb	%d1		//check flag
+	beqs	no_inex		//if clr, no inex
+	bras	dnrm_inex	//if set, set inex
+
+load_sgl:
+	movew	#sgl_thresh,%d1	//put copy of threshold in d1
+	movel	%d1,%d0		//copy d1 into d0
+	subw	LOCAL_EX(%a0),%d0	//diff = threshold - exp
+	cmpw	#67,%d0		//if diff > 67 (mant + grs bits)
+	bpls	chk_stky	//then branch (all bits would be 
+//				; shifted off in denorm routine)
+	clrl	%d0		//else clear the sticky flag
+	bsr	dnrm_lp		//denormalize the number
+	tstb	%d1		//check flag
+	beqs	no_inex		//if clr, no inex
+	bras	dnrm_inex	//if set, set inex
+
+chk_stky:
+	tstl	LOCAL_HI(%a0)	//check for any bits set
+	bnes	set_stky
+	tstl	LOCAL_LO(%a0)	//check for any bits set
+	bnes	set_stky
+	bras	clr_mant
+set_stky:
+	orl	#inx2a_mask,USER_FPSR(%a6) //set inex2/ainex
+	movel	#0x20000000,%d0	//set sticky bit in return value
+clr_mant:
+	movew	%d1,LOCAL_EX(%a0)		//load exp with threshold
+	movel	#0,LOCAL_HI(%a0) 	//set d1 = 0 (ms mantissa)
+	movel	#0,LOCAL_LO(%a0)		//set d2 = 0 (ms mantissa)
+	rts
+dnrm_inex:
+	orl	#inx2a_mask,USER_FPSR(%a6) //set inex2/ainex
+no_inex:
+	rts
+
+//
+//	dnrm_lp --- normalize exponent/mantissa to specified threshold
+//
+// Input:
+//	a0		points to the operand to be denormalized
+//	d0{31:29} 	initial guard,round,sticky
+//	d1{15:0}	denormalization threshold
+// Output:
+//	a0		points to the denormalized operand
+//	d0{31:29}	final guard,round,sticky
+//	d1.b		inexact flag:  all ones means inexact result
+//
+// The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
+// so that bfext can be used to extract the new low part of the mantissa.
+// Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there 
+// is no LOCAL_GRS scratch word following it on the fsave frame.
+//
+	.global	dnrm_lp
+dnrm_lp:
+	movel	%d2,-(%sp)		//save d2 for temp use
+	btstb	#E3,E_BYTE(%a6)		//test for type E3 exception
+	beqs	not_E3			//not type E3 exception
+	bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	//extract guard,round, sticky  bit
+	movel	#29,%d0
+	lsll	%d0,%d2			//shift g,r,s to their positions
+	movel	%d2,%d0
+not_E3:
+	movel	(%sp)+,%d2		//restore d2
+	movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
+	movel	%d0,FP_SCR2+LOCAL_GRS(%a6)
+	movel	%d1,%d0			//copy the denorm threshold
+	subw	LOCAL_EX(%a0),%d1		//d1 = threshold - uns exponent
+	bles	no_lp			//d1 <= 0
+	cmpw	#32,%d1			
+	blts	case_1			//0 = d1 < 32 
+	cmpw	#64,%d1
+	blts	case_2			//32 <= d1 < 64
+	bra	case_3			//d1 >= 64
+//
+// No normalization necessary
+//
+no_lp:
+	clrb	%d1			//set no inex2 reported
+	movel	FP_SCR2+LOCAL_GRS(%a6),%d0	//restore original g,r,s
+	rts
+//
+// case (0<d1<32)
+//
+case_1:
+	movel	%d2,-(%sp)
+	movew	%d0,LOCAL_EX(%a0)		//exponent = denorm threshold
+	movel	#32,%d0
+	subw	%d1,%d0			//d0 = 32 - d1
+	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
+	bfextu	%d2{%d1:%d0},%d2		//d2 = new LOCAL_HI
+	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	//d1 = new LOCAL_LO
+	bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	//d0 = new G,R,S
+	movel	%d2,LOCAL_HI(%a0)		//store new LOCAL_HI
+	movel	%d1,LOCAL_LO(%a0)		//store new LOCAL_LO
+	clrb	%d1
+	bftst	%d0{#2:#30}	
+	beqs	c1nstky
+	bsetl	#rnd_stky_bit,%d0
+	st	%d1
+c1nstky:
+	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	//restore original g,r,s
+	andil	#0xe0000000,%d2		//clear all but G,R,S
+	tstl	%d2			//test if original G,R,S are clear
+	beqs	grs_clear
+	orl	#0x20000000,%d0		//set sticky bit in d0
+grs_clear:
+	andil	#0xe0000000,%d0		//clear all but G,R,S
+	movel	(%sp)+,%d2
+	rts
+//
+// case (32<=d1<64)
+//
+case_2:
+	movel	%d2,-(%sp)
+	movew	%d0,LOCAL_EX(%a0)		//unsigned exponent = threshold
+	subw	#32,%d1			//d1 now between 0 and 32
+	movel	#32,%d0
+	subw	%d1,%d0			//d0 = 32 - d1
+	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
+	bfextu	%d2{%d1:%d0},%d2		//d2 = new LOCAL_LO
+	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	//d1 = new G,R,S
+	bftst	%d1{#2:#30}
+	bnes	c2_sstky		//bra if sticky bit to be set
+	bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32}
+	bnes	c2_sstky		//bra if sticky bit to be set
+	movel	%d1,%d0
+	clrb	%d1
+	bras	end_c2
+c2_sstky:
+	movel	%d1,%d0
+	bsetl	#rnd_stky_bit,%d0
+	st	%d1
+end_c2:
+	clrl	LOCAL_HI(%a0)		//store LOCAL_HI = 0
+	movel	%d2,LOCAL_LO(%a0)		//store LOCAL_LO
+	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	//restore original g,r,s
+	andil	#0xe0000000,%d2		//clear all but G,R,S
+	tstl	%d2			//test if original G,R,S are clear
+	beqs	clear_grs		
+	orl	#0x20000000,%d0		//set sticky bit in d0
+clear_grs:
+	andil	#0xe0000000,%d0		//get rid of all but G,R,S
+	movel	(%sp)+,%d2
+	rts
+//
+// d1 >= 64 Force the exponent to be the denorm threshold with the
+// correct sign.
+//
+case_3:
+	movew	%d0,LOCAL_EX(%a0)
+	tstw	LOCAL_SGN(%a0)
+	bges	c3con
+c3neg:
+	orl	#0x80000000,LOCAL_EX(%a0)
+c3con:
+	cmpw	#64,%d1
+	beqs	sixty_four
+	cmpw	#65,%d1
+	beqs	sixty_five
+//
+// Shift value is out of range.  Set d1 for inex2 flag and
+// return a zero with the given threshold.
+//
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	movel	#0x20000000,%d0
+	st	%d1
+	rts
+
+sixty_four:
+	movel	LOCAL_HI(%a0),%d0
+	bfextu	%d0{#2:#30},%d1
+	andil	#0xc0000000,%d0
+	bras	c3com
+	
+sixty_five:
+	movel	LOCAL_HI(%a0),%d0
+	bfextu	%d0{#1:#31},%d1
+	andil	#0x80000000,%d0
+	lsrl	#1,%d0			//shift high bit into R bit
+
+c3com:
+	tstl	%d1
+	bnes	c3ssticky
+	tstl	LOCAL_LO(%a0)
+	bnes	c3ssticky
+	tstb	FP_SCR2+LOCAL_GRS(%a6)
+	bnes	c3ssticky
+	clrb	%d1
+	bras	c3end
+
+c3ssticky:
+	bsetl	#rnd_stky_bit,%d0
+	st	%d1
+c3end:
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_fpsp.c b/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_fpsp.c
new file mode 100644
index 0000000000..4200ca247c
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_fpsp.c
@@ -0,0 +1,77 @@
+#include <rtems/system.h>
+/*
+#include <rtems/score/isr.h>
+*/
+
+/*
+ * User exception handlers
+ */
+proc_ptr M68040FPSPUserExceptionHandlers[9];
+
+/*
+ * Intercept requests to install an exception handler.
+ * FPSP exceptions get special treatment.
+ */
+static int
+FPSP_install_raw_handler (unsigned32 vector, proc_ptr new_handler, proc_ptr *old_handler)
+{
+  int fpspVector;
+
+  switch (vector) {
+  default:	return 0;	/* Non-FPSP vector */
+  case 11:	fpspVector = 0;	break;	/* F-line */
+  case 48:	fpspVector = 1;	break;	/* BSUN */
+  case 49:	fpspVector = 2;	break;	/* INEXACT */
+  case 50:	fpspVector = 3;	break;	/* DIVIDE-BY-ZERO */
+  case 51:	fpspVector = 4;	break;	/* UNDERFLOW */
+  case 52:	fpspVector = 5;	break;	/* OPERAND ERROR */
+  case 53:	fpspVector = 6;	break;	/* OVERFLOW */
+  case 54:	fpspVector = 7;	break;	/* SIGNALLING NAN */
+  case 55:	fpspVector = 8;	break;	/* UNIMPLEMENTED DATA TYPE */
+  }
+  *old_handler = M68040FPSPUserExceptionHandlers[fpspVector];
+  M68040FPSPUserExceptionHandlers[fpspVector] = new_handler;
+  return 1;
+}
+
+/*
+ * Attach floating point exception vectors to M68040FPSP entry points
+ *
+ *  NOTE: Uses M68K rather than M68040 in the name so all CPUs having
+ *        an FPSP can share the same code in RTEMS proper.
+ */
+void
+M68KFPSPInstallExceptionHandlers (void)
+{
+  extern void _fpspEntry_fline();
+  extern void _fpspEntry_bsun();
+  extern void _fpspEntry_inex();
+  extern void _fpspEntry_dz();
+  extern void _fpspEntry_unfl();
+  extern void _fpspEntry_ovfl();
+  extern void _fpspEntry_operr();
+  extern void _fpspEntry_snan();
+  extern void _fpspEntry_unsupp();
+  static struct {
+    int  vector_number;
+    void  (*handler)();
+  } fpspHandlers[] = {
+    { 11,  _fpspEntry_fline },
+    { 48,  _fpspEntry_bsun },
+    { 49,  _fpspEntry_inex },
+    { 50,  _fpspEntry_dz },
+    { 51,  _fpspEntry_unfl },
+    { 52,  _fpspEntry_operr },
+    { 53,  _fpspEntry_ovfl },
+    { 54,  _fpspEntry_snan },
+    { 55,  _fpspEntry_unsupp },
+  };
+  int i;
+  proc_ptr oldHandler;
+
+  for (i = 0 ; i < sizeof fpspHandlers / sizeof fpspHandlers[0] ; i++) {
+    _CPU_ISR_install_raw_handler(fpspHandlers[i].vector_number, fpspHandlers[i].handler, &oldHandler);
+      M68040FPSPUserExceptionHandlers[i] = oldHandler;
+  }
+  _FPSP_install_raw_handler = FPSP_install_raw_handler;
+}
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_skel.s b/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_skel.s
new file mode 100644
index 0000000000..5d1744b5ae
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_skel.s
@@ -0,0 +1,394 @@
+//
+//	skeleton.sa 3.2 4/26/91
+//
+//	This file contains code that is system dependent and will
+//	need to be modified to install the FPSP.
+//
+//	Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
+//	Put any target system specific handling that must be done immediately
+//	before the jump instruction.  If there no handling necessary, then
+//	the 'fpsp_xxxx' handler entry point should be placed in the exception
+//	table so that the 'jmp' can be eliminated. If the FPSP determines that the
+//	exception is one that must be reported then there will be a
+//	return from the package by a 'jmp real_xxxx'.  At that point
+//	the machine state will be identical to the state before
+//	the FPSP was entered.  In particular, whatever condition
+//	that caused the exception will still be pending when the FPSP
+//	package returns.  Thus, there will be system specific code
+//	to handle the exception.
+//
+//	If the exception was completely handled by the package, then
+//	the return will be via a 'jmp fpsp_done'.  Unless there is 
+//	OS specific work to be done (such as handling a context switch or
+//	interrupt) the user program can be resumed via 'rte'.
+//
+//	In the following skeleton code, some typical 'real_xxxx' handling
+//	code is shown.  This code may need to be moved to an appropriate
+//	place in the target system, or rewritten.
+//	
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//
+//	Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk)
+//
+
+#include <asm.h>
+
+//SKELETON	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	.include "fpsp.defs"
+
+//
+//	Divide by Zero exception
+//
+//	All dz exceptions are 'real', hence no fpsp_dz entry point.
+//
+	.global	SYM(_fpspEntry_dz)
+SYM(_fpspEntry_dz):
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)
+	frestore	(%sp)+
+	unlk		%a6
+	jmp		([SYM(M68040FPSPUserExceptionHandlers)+3*4],za0)
+
+//
+//	Inexact exception
+//
+//	All inexact exceptions are real, but the 'real' handler
+//	will probably want to clear the pending exception.
+//	The provided code will clear the E3 exception (if pending), 
+//	otherwise clear the E1 exception.  The frestore is not really
+//	necessary for E1 exceptions.
+//
+// Code following the 'inex' label is to handle bug #1232.  In this
+// bug, if an E1 snan, ovfl, or unfl occurred, and the process was
+// swapped out before taking the exception, the exception taken on
+// return was inex, rather than the correct exception.  The snan, ovfl,
+// and unfl exception to be taken must not have been enabled.  The
+// fix is to check for E1, and the existence of one of snan, ovfl,
+// or unfl bits set in the fpsr.  If any of these are set, branch
+// to the appropriate  handler for the exception in the fpsr.  Note
+// that this fix is only for d43b parts, and is skipped if the
+// version number is not $40.
+// 
+//
+	.global	SYM(_fpspEntry_inex)
+	.global	real_inex
+SYM(_fpspEntry_inex):
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	cmpib		#VER_40,(%sp)		//test version number
+	bnes		not_fmt40
+	fmovel		%fpsr,-(%sp)
+	btstb		#E1,E_BYTE(%a6)		//test for E1 set
+	beqs		not_b1232
+	btstb		#snan_bit,2(%sp) //test for snan
+	beq		inex_ckofl
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+	bra		snan
+inex_ckofl:
+	btstb		#ovfl_bit,2(%sp) //test for ovfl
+	beq		inex_ckufl 
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+	bra		SYM(_fpspEntry_ovfl)
+inex_ckufl:
+	btstb		#unfl_bit,2(%sp) //test for unfl
+	beq		not_b1232
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+	bra		SYM(_fpspEntry_unfl)
+
+//
+// We do not have the bug 1232 case.  Clean up the stack and call
+// real_inex.
+//
+not_b1232:
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+
+real_inex:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+not_fmt40:
+	bclrb		#E3,E_BYTE(%a6)		//clear and test E3 flag
+	beqs		inex_cke1
+//
+// Clear dirty bit on dest resister in the frame before branching
+// to b1238_fix.
+//
+	moveml		%d0/%d1,USER_DA(%a6)
+	bfextu		CMDREG1B(%a6){#6:#3},%d0		//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix		//test for bug1238 case
+	moveml		USER_DA(%a6),%d0/%d1
+	bras		inex_done
+inex_cke1:
+	bclrb		#E1,E_BYTE(%a6)
+inex_done:
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+2*4],za0)
+	
+//
+//	Overflow exception
+//
+	.global	SYM(_fpspEntry_ovfl)
+	.global	real_ovfl
+SYM(_fpspEntry_ovfl):
+	jmp	fpsp_ovfl
+real_ovfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E3,E_BYTE(%a6)		//clear and test E3 flag
+	bnes		ovfl_done
+	bclrb		#E1,E_BYTE(%a6)
+ovfl_done:
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+6*4],za0)
+	
+//
+//	Underflow exception
+//
+	.global	SYM(_fpspEntry_unfl)
+	.global	real_unfl
+SYM(_fpspEntry_unfl):
+	jmp	fpsp_unfl
+real_unfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E3,E_BYTE(%a6)		//clear and test E3 flag
+	bnes		unfl_done
+	bclrb		#E1,E_BYTE(%a6)
+unfl_done:
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+4*4],za0)
+	
+//
+//	Signalling NAN exception
+//
+	.global	SYM(_fpspEntry_snan)
+	.global	real_snan
+SYM(_fpspEntry_snan):
+snan:
+	jmp	fpsp_snan
+real_snan:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	//snan is always an E1 exception
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+7*4],za0)
+
+//
+//	Operand Error exception
+//
+	.global	SYM(_fpspEntry_operr)
+	.global	real_operr
+SYM(_fpspEntry_operr):
+	jmp	fpsp_operr
+real_operr:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	//operr is always an E1 exception
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+5*4],za0)
+	
+//
+//	BSUN exception
+//
+//	This sample handler simply clears the nan bit in the FPSR.
+//
+	.global	SYM(_fpspEntry_bsun)
+	.global	real_bsun
+SYM(_fpspEntry_bsun):
+	jmp	fpsp_bsun
+real_bsun:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	//bsun is always an E1 exception
+	fmovel		%FPSR,-(%sp)
+	bclrb		#nan_bit,(%sp)
+	fmovel		(%sp)+,%FPSR
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+1*4],za0)
+
+//
+//	F-line exception
+//
+//	A 'real' F-line exception is one that the FPSP is not supposed to 
+//	handle. E.g. an instruction with a co-processor ID that is not 1.
+//
+	.global	SYM(_fpspEntry_fline)
+	.global	real_fline
+SYM(_fpspEntry_fline):
+	jmp	fpsp_fline
+real_fline:
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+0*4],za0)
+
+//
+//	Unsupported data type exception
+//
+	.global	SYM(_fpspEntry_unsupp)
+	.global	real_unsupp
+SYM(_fpspEntry_unsupp):
+	jmp	fpsp_unsupp
+real_unsupp:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	//unsupp is always an E1 exception
+	frestore	(%sp)+
+	unlk		%a6
+	jmp	([SYM(M68040FPSPUserExceptionHandlers)+8*4],za0)
+
+//
+//	Trace exception
+//
+	.global	real_trace
+real_trace:
+	trap	#10
+
+//
+//	fpsp_fmt_error --- exit point for frame format error
+//
+//	The fpu stack frame does not match the frames existing
+//	or planned at the time of this writing.  The fpsp is
+//	unable to handle frame sizes not in the following
+//	version:size pairs:
+//
+//	{4060, 4160} - busy frame
+//	{4028, 4130} - unimp frame
+//	{4000, 4100} - idle frame
+//
+	.global	fpsp_fmt_error
+fpsp_fmt_error:
+	trap	#11
+
+//
+//	fpsp_done --- FPSP exit point
+//
+//	The exception has been handled by the package and we are ready
+//	to return to user mode, but there may be OS specific code
+//	to execute before we do.  If there is, do it now.
+//
+// For now, the RTEMS does not bother looking at the
+// possibility that it is time to reschedule....
+//
+
+	.global	fpsp_done
+fpsp_done:
+	rte
+
+//
+//	mem_write --- write to user or supervisor address space
+//
+// Writes to memory while in supervisor mode.
+//
+//	a0 - supervisor source address
+//	a1 - user/supervisor destination address
+//	d0 - number of bytes to write (maximum count is 12)
+//
+	.global	mem_write
+mem_write:
+	btstb	#5,EXC_SR(%a6)	//check for supervisor state
+	beqs	user_write
+super_write:
+	moveb	(%a0)+,(%a1)+
+	subql	#1,%d0
+	bnes	super_write
+	rts
+user_write:
+	movel	%d1,-(%sp)	//preserve d1 just in case
+	movel	%d0,-(%sp)
+	movel	%a1,-(%sp)
+	movel	%a0,-(%sp)
+	jsr		copyout
+	addw	#12,%sp
+	movel	(%sp)+,%d1
+	rts
+//
+//	mem_read --- read from user or supervisor address space
+//
+// Reads from memory while in supervisor mode.
+//
+// The FPSP calls mem_read to read the original F-line instruction in order
+// to extract the data register number when the 'Dn' addressing mode is
+// used.
+//
+//Input:
+//	a0 - user/supervisor source address
+//	a1 - supervisor destination address
+//	d0 - number of bytes to read (maximum count is 12)
+//
+// Like mem_write, mem_read always reads with a supervisor 
+// destination address on the supervisor stack.  Also like mem_write,
+// the EXC_SR is checked and a simple memory copy is done if reading
+// from supervisor space is indicated.
+//
+	.global	mem_read
+mem_read:
+	btstb	#5,EXC_SR(%a6)	//check for supervisor state
+	beqs	user_read
+super_read:
+	moveb	(%a0)+,(%a1)+
+	subql	#1,%d0
+	bnes	super_read
+	rts
+user_read:
+	movel	%d1,-(%sp)	//preserve d1 just in case
+	movel	%d0,-(%sp)
+	movel	%a1,-(%sp)
+	movel	%a0,-(%sp)
+	jsr		copyin
+	addw	#12,%sp
+	movel	(%sp)+,%d1
+	rts
+
+//
+// Use these routines if your kernel does not have copyout/copyin equivalents.
+// Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
+// and copyin overwrites SFC.
+//
+copyout:
+	movel	4(%sp),%a0	// source
+	movel	8(%sp),%a1	// destination
+	movel	12(%sp),%d0	// count
+	subl	#1,%d0		// dec count by 1 for dbra
+	movel	#1,%d1
+	movec	%d1,%DFC		// set dfc for user data space
+moreout:
+	moveb	(%a0)+,%d1	// fetch supervisor byte
+	movesb	%d1,(%a1)+	// write user byte
+	dbf	%d0,moreout
+	rts
+
+copyin:
+	movel	4(%sp),%a0	// source
+	movel	8(%sp),%a1	// destination
+	movel	12(%sp),%d0	// count
+	subl	#1,%d0		// dec count by 1 for dbra
+	movel	#1,%d1
+	movec	%d1,%SFC		// set sfc for user space
+morein:
+	movesb	(%a0)+,%d1	// fetch user byte
+	moveb	%d1,(%a1)+	// write supervisor byte
+	dbf	%d0,morein
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/sacos.s b/c/src/lib/libcpu/m68k/m68040/fpsp/sacos.s
new file mode 100644
index 0000000000..83390a8ebb
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sacos.s
@@ -0,0 +1,115 @@
+//
+//	sacos.sa 3.3 12/19/90
+//
+//	Description: The entry point sAcos computes the inverse cosine of
+//		an input argument; sAcosd does the same except for denormalized
+//		input.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The value arccos(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 3 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The 
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program sCOS takes approximately 310 cycles.
+//
+//	Algorithm:
+//
+//	ACOS
+//	1. If |X| >= 1, go to 3.
+//
+//	2. (|X| < 1) Calculate acos(X) by
+//		z := (1-X) / (1+X)
+//		acos(X) = 2 * atan( sqrt(z) ).
+//		Exit.
+//
+//	3. If |X| > 1, go to 5.
+//
+//	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
+//
+//	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+//		Exit.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SACOS	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+PI:	.long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
+PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
+
+	|xref	t_operr
+	|xref	t_frcinx
+	|xref	satan
+
+	.global	sacosd
+sacosd:
+//--ACOS(X) = PI/2 FOR DENORMALIZED X
+	fmovel		%d1,%fpcr		// ...load user's rounding mode/precision
+	fmovex		PIBY2,%fp0
+	bra		t_frcinx
+
+	.global	sacos
+sacos:
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0		// ...pack exponent with upper 16 fraction
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x3FFF8000,%d0
+	bges		ACOSBIG
+
+//--THIS IS THE USUAL CASE, |X| < 1
+//--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) )	)
+
+	fmoves		#0x3F800000,%fp1
+	faddx		%fp0,%fp1	 	// ...1+X
+	fnegx		%fp0	 	// ... -X
+	fadds		#0x3F800000,%fp0	// ...1-X
+	fdivx		%fp1,%fp0	 	// ...(1-X)/(1+X)
+	fsqrtx		%fp0		// ...SQRT((1-X)/(1+X))
+	fmovemx	%fp0-%fp0,(%a0)	// ...overwrite input
+	movel		%d1,-(%sp)	//save original users fpcr
+	clrl		%d1
+	bsr		satan		// ...ATAN(SQRT([1-X]/[1+X]))
+	fmovel		(%sp)+,%fpcr	//restore users exceptions
+	faddx		%fp0,%fp0	 	// ...2 * ATAN( STUFF )
+	bra		t_frcinx
+
+ACOSBIG:
+	fabsx		%fp0
+	fcmps		#0x3F800000,%fp0
+	fbgt		t_operr		//cause an operr exception
+
+//--|X| = 1, ACOS(X) = 0 OR PI
+	movel		(%a0),%d0		// ...pack exponent with upper 16 fraction
+	movew		4(%a0),%d0
+	cmpl		#0,%d0		//D0 has original exponent+fraction
+	bgts		ACOSP1
+
+//--X = -1
+//Returns PI and inexact exception
+	fmovex		PI,%fp0
+	fmovel		%d1,%FPCR
+	fadds		#0x00800000,%fp0	//cause an inexact exception to be put
+//					;into the 040 - will not trap until next
+//					;fp inst.
+	bra		t_frcinx
+
+ACOSP1:
+	fmovel		%d1,%FPCR
+	fmoves		#0x00000000,%fp0
+	rts				//Facos ; of +1 is exact	
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/sasin.s b/c/src/lib/libcpu/m68k/m68040/fpsp/sasin.s
new file mode 100644
index 0000000000..6725b9b143
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sasin.s
@@ -0,0 +1,104 @@
+//
+//	sasin.sa 3.3 12/19/90
+//
+//	Description: The entry point sAsin computes the inverse sine of
+//		an input argument; sAsind does the same except for denormalized
+//		input.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The value arcsin(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 3 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The 
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program sASIN takes approximately 310 cycles.
+//
+//	Algorithm:
+//
+//	ASIN
+//	1. If |X| >= 1, go to 3.
+//
+//	2. (|X| < 1) Calculate asin(X) by
+//		z := sqrt( [1-X][1+X] )
+//		asin(X) = atan( x / z ).
+//		Exit.
+//
+//	3. If |X| > 1, go to 5.
+//
+//	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
+//
+//	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+//		Exit.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SASIN	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
+
+	|xref	t_operr
+	|xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	satan
+
+	.global	sasind
+sasind:
+//--ASIN(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	sasin
+sasin:
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x3FFF8000,%d0
+	bges		asinbig
+
+//--THIS IS THE USUAL CASE, |X| < 1
+//--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
+
+	fmoves		#0x3F800000,%fp1
+	fsubx		%fp0,%fp1		// ...1-X
+	fmovemx	%fp2-%fp2,-(%a7)
+	fmoves		#0x3F800000,%fp2
+	faddx		%fp0,%fp2		// ...1+X
+	fmulx		%fp2,%fp1		// ...(1+X)(1-X)
+	fmovemx	(%a7)+,%fp2-%fp2
+	fsqrtx		%fp1		// ...SQRT([1-X][1+X])
+	fdivx		%fp1,%fp0	 	// ...X/SQRT([1-X][1+X])
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		satan
+	bra		t_frcinx
+
+asinbig:
+	fabsx		%fp0	 // ...|X|
+	fcmps		#0x3F800000,%fp0
+	fbgt		t_operr		//cause an operr exception
+
+//--|X| = 1, ASIN(X) = +- PI/2.
+
+	fmovex		PIBY2,%fp0
+	movel		(%a0),%d0
+	andil		#0x80000000,%d0	// ...SIGN BIT OF X
+	oril		#0x3F800000,%d0	// ...+-1 IN SGL FORMAT
+	movel		%d0,-(%sp)	// ...push SIGN(X) IN SGL-FMT
+	fmovel		%d1,%FPCR		
+	fmuls		(%sp)+,%fp0
+	bra		t_frcinx
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/satan.s b/c/src/lib/libcpu/m68k/m68040/fpsp/satan.s
new file mode 100644
index 0000000000..662b0430f0
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/satan.s
@@ -0,0 +1,478 @@
+//
+//	satan.sa 3.3 12/19/90
+//
+//	The entry point satan computes the arctangent of an
+//	input value. satand does the same except the input value is a
+//	denormalized number.
+//
+//	Input: Double-extended value in memory location pointed to by address
+//		register a0.
+//
+//	Output:	Arctan(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 2 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program satan takes approximately 160 cycles for input
+//		argument X such that 1/16 < |X| < 16. For the other arguments,
+//		the program will run no worse than 10% slower.
+//
+//	Algorithm:
+//	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
+//
+//	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
+//		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
+//		of X with a bit-1 attached at the 6-th bit position. Define u
+//		to be u = (X-F) / (1 + X*F).
+//
+//	Step 3. Approximate arctan(u) by a polynomial poly.
+//
+//	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
+//		calculated beforehand. Exit.
+//
+//	Step 5. If |X| >= 16, go to Step 7.
+//
+//	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
+//
+//	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
+//		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//satan	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+	
+BOUNDS1:	.long 0x3FFB8000,0x4002FFFF
+
+ONE:	.long 0x3F800000
+
+	.long 0x00000000
+
+ATANA3:	.long 0xBFF6687E,0x314987D8
+ATANA2:	.long 0x4002AC69,0x34A26DB3
+
+ATANA1:	.long 0xBFC2476F,0x4E1DA28E
+ATANB6:	.long 0x3FB34444,0x7F876989
+
+ATANB5:	.long 0xBFB744EE,0x7FAF45DB
+ATANB4:	.long 0x3FBC71C6,0x46940220
+
+ATANB3:	.long 0xBFC24924,0x921872F9
+ATANB2:	.long 0x3FC99999,0x99998FA9
+
+ATANB1:	.long 0xBFD55555,0x55555555
+ATANC5:	.long 0xBFB70BF3,0x98539E6A
+
+ATANC4:	.long 0x3FBC7187,0x962D1D7D
+ATANC3:	.long 0xBFC24924,0x827107B8
+
+ATANC2:	.long 0x3FC99999,0x9996263E
+ATANC1:	.long 0xBFD55555,0x55555536
+
+PPIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
+NPIBY2:	.long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
+PTINY:	.long 0x00010000,0x80000000,0x00000000,0x00000000
+NTINY:	.long 0x80010000,0x80000000,0x00000000,0x00000000
+
+ATANTBL:
+	.long	0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
+	.long	0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
+	.long	0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
+	.long	0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
+	.long	0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
+	.long	0x3FFB0000,0xAB98E943,0x62765619,0x00000000
+	.long	0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
+	.long	0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
+	.long	0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
+	.long	0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
+	.long	0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
+	.long	0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
+	.long	0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
+	.long	0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
+	.long	0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
+	.long	0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
+	.long	0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
+	.long	0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
+	.long	0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
+	.long	0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
+	.long	0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
+	.long	0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
+	.long	0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
+	.long	0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
+	.long	0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
+	.long	0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
+	.long	0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
+	.long	0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
+	.long	0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
+	.long	0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
+	.long	0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
+	.long	0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
+	.long	0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
+	.long	0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
+	.long	0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
+	.long	0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
+	.long	0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
+	.long	0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
+	.long	0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
+	.long	0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
+	.long	0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
+	.long	0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
+	.long	0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
+	.long	0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
+	.long	0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
+	.long	0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
+	.long	0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
+	.long	0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
+	.long	0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
+	.long	0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
+	.long	0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
+	.long	0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
+	.long	0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
+	.long	0x3FFE0000,0x97731420,0x365E538C,0x00000000
+	.long	0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
+	.long	0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
+	.long	0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
+	.long	0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
+	.long	0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
+	.long	0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
+	.long	0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
+	.long	0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
+	.long	0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
+	.long	0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
+	.long	0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
+	.long	0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
+	.long	0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
+	.long	0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
+	.long	0x3FFE0000,0xE8771129,0xC4353259,0x00000000
+	.long	0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
+	.long	0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
+	.long	0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
+	.long	0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
+	.long	0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
+	.long	0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
+	.long	0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
+	.long	0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
+	.long	0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
+	.long	0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
+	.long	0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
+	.long	0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
+	.long	0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
+	.long	0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
+	.long	0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
+	.long	0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
+	.long	0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
+	.long	0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
+	.long	0x3FFF0000,0x9F100575,0x006CC571,0x00000000
+	.long	0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
+	.long	0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
+	.long	0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
+	.long	0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
+	.long	0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
+	.long	0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
+	.long	0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
+	.long	0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
+	.long	0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
+	.long	0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
+	.long	0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
+	.long	0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
+	.long	0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
+	.long	0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
+	.long	0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
+	.long	0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
+	.long	0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
+	.long	0x3FFF0000,0xB525529D,0x562246BD,0x00000000
+	.long	0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
+	.long	0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
+	.long	0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
+	.long	0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
+	.long	0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
+	.long	0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
+	.long	0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
+	.long	0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
+	.long	0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
+	.long	0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
+	.long	0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
+	.long	0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
+	.long	0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
+	.long	0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
+	.long	0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
+	.long	0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
+	.long	0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
+	.long	0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
+	.long	0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
+	.long	0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
+	.long	0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
+	.long	0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
+
+	.set	X,FP_SCR1
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+	.set	XFRACLO,X+8
+
+	.set	ATANF,FP_SCR2
+	.set	ATANFHI,ATANF+4
+	.set	ATANFLO,ATANF+8
+
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+
+	.global	satand
+satand:
+//--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
+
+	bra		t_extdnrm
+
+	.global	satan
+satan:
+//--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FFB8000,%d0		// ...|X| >= 1/16?
+	bges		ATANOK1
+	bra		ATANSM
+
+ATANOK1:
+	cmpil		#0x4002FFFF,%d0		// ...|X| < 16 ?
+	bles		ATANMAIN
+	bra		ATANBIG
+
+
+//--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
+//--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
+//--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
+//--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
+//--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
+//--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
+//--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
+//--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
+//--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
+//--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
+//--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
+//--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
+//--WILL INVOLVE A VERY LONG POLYNOMIAL.
+
+//--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
+//--WE CHOSE F TO BE +-2^K * 1.BBBB1
+//--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
+//--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
+//--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
+//-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
+
+ATANMAIN:
+
+	movew		#0x0000,XDCARE(%a6)	// ...CLEAN UP X JUST IN CASE
+	andil		#0xF8000000,XFRAC(%a6)	// ...FIRST 5 BITS
+	oril		#0x04000000,XFRAC(%a6)	// ...SET 6-TH BIT TO 1
+	movel		#0x00000000,XFRACLO(%a6)	// ...LOCATION OF X IS NOW F
+
+	fmovex		%fp0,%fp1			// ...FP1 IS X
+	fmulx		X(%a6),%fp1		// ...FP1 IS X*F, NOTE THAT X*F > 0
+	fsubx		X(%a6),%fp0		// ...FP0 IS X-F
+	fadds		#0x3F800000,%fp1		// ...FP1 IS 1 + X*F
+	fdivx		%fp1,%fp0			// ...FP0 IS U = (X-F)/(1+X*F)
+
+//--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
+//--CREATE ATAN(F) AND STORE IT IN ATANF, AND
+//--SAVE REGISTERS FP2.
+
+	movel		%d2,-(%a7)	// ...SAVE d2 TEMPORARILY
+	movel		%d0,%d2		// ...THE EXPO AND 16 BITS OF X
+	andil		#0x00007800,%d0	// ...4 VARYING BITS OF F'S FRACTION
+	andil		#0x7FFF0000,%d2	// ...EXPONENT OF F
+	subil		#0x3FFB0000,%d2	// ...K+4
+	asrl		#1,%d2
+	addl		%d2,%d0		// ...THE 7 BITS IDENTIFYING F
+	asrl		#7,%d0		// ...INDEX INTO TBL OF ATAN(|F|)
+	lea		ATANTBL,%a1
+	addal		%d0,%a1		// ...ADDRESS OF ATAN(|F|)
+	movel		(%a1)+,ATANF(%a6)
+	movel		(%a1)+,ATANFHI(%a6)
+	movel		(%a1)+,ATANFLO(%a6)	// ...ATANF IS NOW ATAN(|F|)
+	movel		X(%a6),%d0		// ...LOAD SIGN AND EXPO. AGAIN
+	andil		#0x80000000,%d0	// ...SIGN(F)
+	orl		%d0,ATANF(%a6)	// ...ATANF IS NOW SIGN(F)*ATAN(|F|)
+	movel		(%a7)+,%d2	// ...RESTORE d2
+
+//--THAT'S ALL I HAVE TO DO FOR NOW,
+//--BUT ALAS, THE DIVIDE IS STILL CRANKING!
+
+//--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
+//--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
+//--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
+//--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
+//--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
+//--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
+//--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
+
+	
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1
+	fmoved		ATANA3,%fp2
+	faddx		%fp1,%fp2		// ...A3+V
+	fmulx		%fp1,%fp2		// ...V*(A3+V)
+	fmulx		%fp0,%fp1		// ...U*V
+	faddd		ATANA2,%fp2	// ...A2+V*(A3+V)
+	fmuld		ATANA1,%fp1	// ...A1*U*V
+	fmulx		%fp2,%fp1		// ...A1*U*V*(A2+V*(A3+V))
+	
+	faddx		%fp1,%fp0		// ...ATAN(U), FP1 RELEASED
+	fmovel		%d1,%FPCR		//restore users exceptions
+	faddx		ATANF(%a6),%fp0	// ...ATAN(X)
+	bra		t_frcinx
+
+ATANBORS:
+//--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
+//--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
+	cmpil		#0x3FFF8000,%d0
+	bgt		ATANBIG	// ...I.E. |X| >= 16
+
+ATANSM:
+//--|X| <= 1/16
+//--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
+//--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
+//--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
+//--WHERE Y = X*X, AND Z = Y*Y.
+
+	cmpil		#0x3FD78000,%d0
+	blt		ATANTINY
+//--COMPUTE POLYNOMIAL
+	fmulx		%fp0,%fp0	// ...FP0 IS Y = X*X
+
+	
+	movew		#0x0000,XDCARE(%a6)
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		// ...FP1 IS Z = Y*Y
+
+	fmoved		ATANB6,%fp2
+	fmoved		ATANB5,%fp3
+
+	fmulx		%fp1,%fp2		// ...Z*B6
+	fmulx		%fp1,%fp3		// ...Z*B5
+
+	faddd		ATANB4,%fp2	// ...B4+Z*B6
+	faddd		ATANB3,%fp3	// ...B3+Z*B5
+
+	fmulx		%fp1,%fp2		// ...Z*(B4+Z*B6)
+	fmulx		%fp3,%fp1		// ...Z*(B3+Z*B5)
+
+	faddd		ATANB2,%fp2	// ...B2+Z*(B4+Z*B6)
+	faddd		ATANB1,%fp1	// ...B1+Z*(B3+Z*B5)
+
+	fmulx		%fp0,%fp2		// ...Y*(B2+Z*(B4+Z*B6))
+	fmulx		X(%a6),%fp0		// ...X*Y
+
+	faddx		%fp2,%fp1		// ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
+	
+
+	fmulx		%fp1,%fp0	// ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	faddx		X(%a6),%fp0
+
+	bra		t_frcinx
+
+ATANTINY:
+//--|X| < 2^(-40), ATAN(X) = X
+	movew		#0x0000,XDCARE(%a6)
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fmovex		X(%a6),%fp0	//last inst - possible exception set
+
+	bra		t_frcinx
+
+ATANBIG:
+//--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
+//--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
+	cmpil		#0x40638000,%d0
+	bgt		ATANHUGE
+
+//--APPROXIMATE ATAN(-1/X) BY
+//--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
+//--THIS CAN BE RE-WRITTEN AS
+//--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
+
+	fmoves		#0xBF800000,%fp1	// ...LOAD -1
+	fdivx		%fp0,%fp1		// ...FP1 IS -1/X
+
+	
+//--DIVIDE IS STILL CRANKING
+
+	fmovex		%fp1,%fp0		// ...FP0 IS X'
+	fmulx		%fp0,%fp0		// ...FP0 IS Y = X'*X'
+	fmovex		%fp1,X(%a6)		// ...X IS REALLY X'
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		// ...FP1 IS Z = Y*Y
+
+	fmoved		ATANC5,%fp3
+	fmoved		ATANC4,%fp2
+
+	fmulx		%fp1,%fp3		// ...Z*C5
+	fmulx		%fp1,%fp2		// ...Z*B4
+
+	faddd		ATANC3,%fp3	// ...C3+Z*C5
+	faddd		ATANC2,%fp2	// ...C2+Z*C4
+
+	fmulx		%fp3,%fp1		// ...Z*(C3+Z*C5), FP3 RELEASED
+	fmulx		%fp0,%fp2		// ...Y*(C2+Z*C4)
+
+	faddd		ATANC1,%fp1	// ...C1+Z*(C3+Z*C5)
+	fmulx		X(%a6),%fp0		// ...X'*Y
+
+	faddx		%fp2,%fp1		// ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
+	
+
+	fmulx		%fp1,%fp0		// ...X'*Y*([B1+Z*(B3+Z*B5)]
+//					...	+[Y*(B2+Z*(B4+Z*B6))])
+	faddx		X(%a6),%fp0
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	
+	btstb		#7,(%a0)
+	beqs		pos_big
+
+neg_big:
+	faddx		NPIBY2,%fp0
+	bra		t_frcinx
+
+pos_big:
+	faddx		PPIBY2,%fp0
+	bra		t_frcinx
+
+ATANHUGE:
+//--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
+	btstb		#7,(%a0)
+	beqs		pos_huge
+
+neg_huge:
+	fmovex		NPIBY2,%fp0
+	fmovel		%d1,%fpcr
+	fsubx		NTINY,%fp0
+	bra		t_frcinx
+
+pos_huge:
+	fmovex		PPIBY2,%fp0
+	fmovel		%d1,%fpcr
+	fsubx		PTINY,%fp0
+	bra		t_frcinx
+	
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/satanh.s b/c/src/lib/libcpu/m68k/m68040/fpsp/satanh.s
new file mode 100644
index 0000000000..6874f83e34
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/satanh.s
@@ -0,0 +1,104 @@
+//
+//	satanh.sa 3.3 12/19/90
+//
+//	The entry point satanh computes the inverse
+//	hyperbolic tangent of
+//	an input argument; satanhd does the same except for denormalized
+//	input.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The value arctanh(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 3 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The 
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program satanh takes approximately 270 cycles.
+//
+//	Algorithm:
+//
+//	ATANH
+//	1. If |X| >= 1, go to 3.
+//
+//	2. (|X| < 1) Calculate atanh(X) by
+//		sgn := sign(X)
+//		y := |X|
+//		z := 2y/(1-y)
+//		atanh(X) := sgn * (1/2) * logp1(z)
+//		Exit.
+//
+//	3. If |X| > 1, go to 5.
+//
+//	4. (|X| = 1) Generate infinity with an appropriate sign and
+//		divide-by-zero by	
+//		sgn := sign(X)
+//		atan(X) := sgn / (+0).
+//		Exit.
+//
+//	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+//		Exit.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//satanh	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	t_dz
+	|xref	t_operr
+	|xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	slognp1
+
+	.global	satanhd
+satanhd:
+//--ATANH(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	satanh
+satanh:
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x3FFF8000,%d0
+	bges		ATANHBIG
+
+//--THIS IS THE USUAL CASE, |X| < 1
+//--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
+
+	fabsx		(%a0),%fp0	// ...Y = |X|
+	fmovex		%fp0,%fp1
+	fnegx		%fp1		// ...-Y
+	faddx		%fp0,%fp0		// ...2Y
+	fadds		#0x3F800000,%fp1	// ...1-Y
+	fdivx		%fp1,%fp0		// ...2Y/(1-Y)
+	movel		(%a0),%d0
+	andil		#0x80000000,%d0
+	oril		#0x3F000000,%d0	// ...SIGN(X)*HALF
+	movel		%d0,-(%sp)
+
+	fmovemx	%fp0-%fp0,(%a0)	// ...overwrite input
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slognp1		// ...LOG1P(Z)
+	fmovel		(%sp)+,%fpcr
+	fmuls		(%sp)+,%fp0
+	bra		t_frcinx
+
+ATANHBIG:
+	fabsx		(%a0),%fp0	// ...|X|
+	fcmps		#0x3F800000,%fp0
+	fbgt		t_operr
+	bra		t_dz
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/scale.s b/c/src/lib/libcpu/m68k/m68040/fpsp/scale.s
new file mode 100644
index 0000000000..3e990d8d33
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/scale.s
@@ -0,0 +1,371 @@
+//
+//	scale.sa 3.3 7/30/91
+//
+//	The entry point sSCALE computes the destination operand
+//	scaled by the source operand.  If the absolute value of
+//	the source operand is (>= 2^14) an overflow or underflow
+//	is returned.
+//
+//	The entry point sscale is called from do_func to emulate
+//	the fscale unimplemented instruction.
+//
+//	Input: Double-extended destination operand in FPTEMP, 
+//		double-extended source operand in ETEMP.
+//
+//	Output: The function returns scale(X,Y) to fp0.
+//
+//	Modifies: fp0.
+//
+//	Algorithm:
+//		
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SCALE    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	t_ovfl2
+	|xref	t_unfl
+	|xref	round
+	|xref	t_resdnrm
+
+SRC_BNDS: .short	0x3fff,0x400c
+
+//
+// This entry point is used by the unimplemented instruction exception
+// handler.
+//
+//
+//
+//	FSCALE
+//
+	.global	sscale
+sscale:
+	fmovel		#0,%fpcr		//clr user enabled exc
+	clrl		%d1
+	movew		FPTEMP(%a6),%d1	//get dest exponent
+	smi		L_SCR1(%a6)	//use L_SCR1 to hold sign
+	andil		#0x7fff,%d1	//strip sign
+	movew		ETEMP(%a6),%d0	//check src bounds
+	andiw		#0x7fff,%d0	//clr sign bit
+	cmp2w		SRC_BNDS,%d0
+	bccs		src_in
+	cmpiw		#0x400c,%d0	//test for too large
+	bge		src_out
+//
+// The source input is below 1, so we check for denormalized numbers
+// and set unfl.
+//
+src_small:
+	moveb		DTAG(%a6),%d0
+	andib		#0xe0,%d0
+	tstb		%d0
+	beqs		no_denorm
+	st		STORE_FLG(%a6)	//dest already contains result
+	orl		#unfl_mask,USER_FPSR(%a6) //set UNFL
+den_done:
+	leal		FPTEMP(%a6),%a0
+	bra		t_resdnrm
+no_denorm:
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	//simply return dest
+	rts
+
+
+//
+// Source is within 2^14 range.  To perform the int operation,
+// move it to d0.
+//
+src_in:
+	fmovex		ETEMP(%a6),%fp0	//move in src for int
+	fmovel		#rz_mode,%fpcr	//force rz for src conversion
+	fmovel		%fp0,%d0		//int src to d0
+	fmovel		#0,%FPSR		//clr status from above
+	tstw		ETEMP(%a6)	//check src sign
+	blt		src_neg
+//
+// Source is positive.  Add the src to the dest exponent.
+// The result can be denormalized, if src = 0, or overflow,
+// if the result of the add sets a bit in the upper word.
+//
+src_pos:
+	tstw		%d1		//check for denorm
+	beq		dst_dnrm
+	addl		%d0,%d1		//add src to dest exp
+	beqs		denorm		//if zero, result is denorm
+	cmpil		#0x7fff,%d1	//test for overflow
+	bges		ovfl
+	tstb		L_SCR1(%a6)
+	beqs		spos_pos
+	orw		#0x8000,%d1
+spos_pos:
+	movew		%d1,FPTEMP(%a6)	//result in FPTEMP
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	//write result to fp0
+	rts
+ovfl:
+	tstb		L_SCR1(%a6)
+	beqs		sovl_pos
+	orw		#0x8000,%d1
+sovl_pos:
+	movew		FPTEMP(%a6),ETEMP(%a6)	//result in ETEMP
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	bra		t_ovfl2
+
+denorm:
+	tstb		L_SCR1(%a6)
+	beqs		den_pos
+	orw		#0x8000,%d1
+den_pos:
+	tstl		FPTEMP_HI(%a6)	//check j bit
+	blts		nden_exit	//if set, not denorm
+	movew		%d1,ETEMP(%a6)	//input expected in ETEMP
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	orl		#unfl_bit,USER_FPSR(%a6)	//set unfl
+	leal		ETEMP(%a6),%a0
+	bra		t_resdnrm
+nden_exit:
+	movew		%d1,FPTEMP(%a6)	//result in FPTEMP
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	//write result to fp0
+	rts
+
+//
+// Source is negative.  Add the src to the dest exponent.
+// (The result exponent will be reduced).  The result can be
+// denormalized.
+//
+src_neg:
+	addl		%d0,%d1		//add src to dest
+	beqs		denorm		//if zero, result is denorm
+	blts		fix_dnrm	//if negative, result is 
+//					;needing denormalization
+	tstb		L_SCR1(%a6)
+	beqs		sneg_pos
+	orw		#0x8000,%d1
+sneg_pos:
+	movew		%d1,FPTEMP(%a6)	//result in FPTEMP
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	//write result to fp0
+	rts
+
+
+//
+// The result exponent is below denorm value.  Test for catastrophic
+// underflow and force zero if true.  If not, try to shift the 
+// mantissa right until a zero exponent exists.
+//
+fix_dnrm:
+	cmpiw		#0xffc0,%d1	//lower bound for normalization
+	blt		fix_unfl	//if lower, catastrophic unfl
+	movew		%d1,%d0		//use d0 for exp
+	movel		%d2,-(%a7)	//free d2 for norm
+	movel		FPTEMP_HI(%a6),%d1
+	movel		FPTEMP_LO(%a6),%d2
+	clrl		L_SCR2(%a6)
+fix_loop:
+	addw		#1,%d0		//drive d0 to 0
+	lsrl		#1,%d1		//while shifting the
+	roxrl		#1,%d2		//mantissa to the right
+	bccs		no_carry
+	st		L_SCR2(%a6)	//use L_SCR2 to capture inex
+no_carry:
+	tstw		%d0		//it is finished when
+	blts		fix_loop	//d0 is zero or the mantissa
+	tstb		L_SCR2(%a6)
+	beqs		tst_zero
+	orl		#unfl_inx_mask,USER_FPSR(%a6)
+//					;set unfl, aunfl, ainex
+//
+// Test for zero. If zero, simply use fmove to return +/- zero
+// to the fpu.
+//
+tst_zero:
+	clrw		FPTEMP_EX(%a6)
+	tstb		L_SCR1(%a6)	//test for sign
+	beqs		tst_con
+	orw		#0x8000,FPTEMP_EX(%a6) //set sign bit
+tst_con:
+	movel		%d1,FPTEMP_HI(%a6)
+	movel		%d2,FPTEMP_LO(%a6)
+	movel		(%a7)+,%d2
+	tstl		%d1
+	bnes		not_zero
+	tstl		FPTEMP_LO(%a6)
+	bnes		not_zero
+//
+// Result is zero.  Check for rounding mode to set lsb.  If the
+// mode is rp, and the zero is positive, return smallest denorm.
+// If the mode is rm, and the zero is negative, return smallest
+// negative denorm.
+//
+	btstb		#5,FPCR_MODE(%a6) //test if rm or rp
+	beqs		no_dir
+	btstb		#4,FPCR_MODE(%a6) //check which one
+	beqs		zer_rm
+zer_rp:
+	tstb		L_SCR1(%a6)	//check sign
+	bnes		no_dir		//if set, neg op, no inc
+	movel		#1,FPTEMP_LO(%a6) //set lsb
+	bras		sm_dnrm
+zer_rm:
+	tstb		L_SCR1(%a6)	//check sign
+	beqs		no_dir		//if clr, neg op, no inc
+	movel		#1,FPTEMP_LO(%a6) //set lsb
+	orl		#neg_mask,USER_FPSR(%a6) //set N
+	bras		sm_dnrm
+no_dir:
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	//use fmove to set cc's
+	rts
+
+//
+// The rounding mode changed the zero to a smallest denorm. Call 
+// t_resdnrm with exceptional operand in ETEMP.
+//
+sm_dnrm:
+	movel		FPTEMP_EX(%a6),ETEMP_EX(%a6)
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	leal		ETEMP(%a6),%a0
+	bra		t_resdnrm
+
+//
+// Result is still denormalized.
+//
+not_zero:
+	orl		#unfl_mask,USER_FPSR(%a6) //set unfl
+	tstb		L_SCR1(%a6)	//check for sign
+	beqs		fix_exit
+	orl		#neg_mask,USER_FPSR(%a6) //set N
+fix_exit:
+	bras		sm_dnrm
+
+	
+//
+// The result has underflowed to zero. Return zero and set
+// unfl, aunfl, and ainex.
+//
+fix_unfl:
+	orl		#unfl_inx_mask,USER_FPSR(%a6)
+	btstb		#5,FPCR_MODE(%a6) //test if rm or rp
+	beqs		no_dir2
+	btstb		#4,FPCR_MODE(%a6) //check which one
+	beqs		zer_rm2
+zer_rp2:
+	tstb		L_SCR1(%a6)	//check sign
+	bnes		no_dir2		//if set, neg op, no inc
+	clrl		FPTEMP_EX(%a6)
+	clrl		FPTEMP_HI(%a6)
+	movel		#1,FPTEMP_LO(%a6) //set lsb
+	bras		sm_dnrm		//return smallest denorm
+zer_rm2:
+	tstb		L_SCR1(%a6)	//check sign
+	beqs		no_dir2		//if clr, neg op, no inc
+	movew		#0x8000,FPTEMP_EX(%a6)
+	clrl		FPTEMP_HI(%a6)
+	movel		#1,FPTEMP_LO(%a6) //set lsb
+	orl		#neg_mask,USER_FPSR(%a6) //set N
+	bra		sm_dnrm		//return smallest denorm
+
+no_dir2:
+	tstb		L_SCR1(%a6)
+	bges		pos_zero
+neg_zero:
+	clrl		FP_SCR1(%a6)	//clear the exceptional operand
+	clrl		FP_SCR1+4(%a6)	//for gen_except.
+	clrl		FP_SCR1+8(%a6)
+	fmoves		#0x80000000,%fp0	
+	rts
+pos_zero:
+	clrl		FP_SCR1(%a6)	//clear the exceptional operand
+	clrl		FP_SCR1+4(%a6)	//for gen_except.
+	clrl		FP_SCR1+8(%a6)
+	fmoves		#0x00000000,%fp0
+	rts
+
+//
+// The destination is a denormalized number.  It must be handled
+// by first shifting the bits in the mantissa until it is normalized,
+// then adding the remainder of the source to the exponent.
+//
+dst_dnrm:
+	moveml		%d2/%d3,-(%a7)	
+	movew		FPTEMP_EX(%a6),%d1
+	movel		FPTEMP_HI(%a6),%d2
+	movel		FPTEMP_LO(%a6),%d3
+dst_loop:
+	tstl		%d2		//test for normalized result
+	blts		dst_norm	//exit loop if so
+	tstl		%d0		//otherwise, test shift count
+	beqs		dst_fin		//if zero, shifting is done
+	subil		#1,%d0		//dec src
+	lsll		#1,%d3
+	roxll		#1,%d2
+	bras		dst_loop
+//
+// Destination became normalized.  Simply add the remaining 
+// portion of the src to the exponent.
+//
+dst_norm:
+	addw		%d0,%d1		//dst is normalized; add src
+	tstb		L_SCR1(%a6)
+	beqs		dnrm_pos
+	orl		#0x8000,%d1
+dnrm_pos:
+	movemw		%d1,FPTEMP_EX(%a6)
+	moveml		%d2,FPTEMP_HI(%a6)
+	moveml		%d3,FPTEMP_LO(%a6)
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0
+	moveml		(%a7)+,%d2/%d3
+	rts
+
+//
+// Destination remained denormalized.  Call t_excdnrm with
+// exceptional operand in ETEMP.
+//
+dst_fin:
+	tstb		L_SCR1(%a6)	//check for sign
+	beqs		dst_exit
+	orl		#neg_mask,USER_FPSR(%a6) //set N
+	orl		#0x8000,%d1
+dst_exit:
+	movemw		%d1,ETEMP_EX(%a6)
+	moveml		%d2,ETEMP_HI(%a6)
+	moveml		%d3,ETEMP_LO(%a6)
+	orl		#unfl_mask,USER_FPSR(%a6) //set unfl
+	moveml		(%a7)+,%d2/%d3
+	leal		ETEMP(%a6),%a0
+	bra		t_resdnrm
+
+//
+// Source is outside of 2^14 range.  Test the sign and branch
+// to the appropriate exception handler.
+//
+src_out:
+	tstb		L_SCR1(%a6)
+	beqs		scro_pos
+	orl		#0x8000,%d1
+scro_pos:
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	tstw		ETEMP(%a6)
+	blts		res_neg
+res_pos:
+	movew		%d1,ETEMP(%a6)	//result in ETEMP
+	bra		t_ovfl2
+res_neg:
+	movew		%d1,ETEMP(%a6)	//result in ETEMP
+	leal		ETEMP(%a6),%a0
+	bra		t_unfl
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/scosh.s b/c/src/lib/libcpu/m68k/m68040/fpsp/scosh.s
new file mode 100644
index 0000000000..cf603883be
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/scosh.s
@@ -0,0 +1,132 @@
+//
+//	scosh.sa 3.1 12/10/90
+//
+//	The entry point sCosh computes the hyperbolic cosine of
+//	an input argument; sCoshd does the same except for denormalized
+//	input.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The value cosh(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 3 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program sCOSH takes approximately 250 cycles.
+//
+//	Algorithm:
+//
+//	COSH
+//	1. If |X| > 16380 log2, go to 3.
+//
+//	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
+//		y = |X|, z = exp(Y), and
+//		cosh(X) = (1/2)*( z + 1/z ).
+//		Exit.
+//
+//	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
+//
+//	4. (16380 log2 < |X| <= 16480 log2)
+//		cosh(X) = sign(X) * exp(|X|)/2.
+//		However, invoking exp(|X|) may cause premature overflow.
+//		Thus, we calculate sinh(X) as follows:
+//		Y	:= |X|
+//		Fact	:=	2**(16380)
+//		Y'	:= Y - 16381 log2
+//		cosh(X) := Fact * exp(Y').
+//		Exit.
+//
+//	5. (|X| > 16480 log2) sinh(X) must overflow. Return
+//		Huge*Huge to generate overflow and an infinity with
+//		the appropriate sign. Huge is the largest finite number in
+//		extended format. Exit.
+//
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SCOSH	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	t_ovfl
+	|xref	t_frcinx
+	|xref	setox
+
+T1:	.long 0x40C62D38,0xD3D64634 // ... 16381 LOG2 LEAD
+T2:	.long 0x3D6F90AE,0xB1E75CC7 // ... 16381 LOG2 TRAIL
+
+TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
+
+	.global	scoshd
+scoshd:
+//--COSH(X) = 1 FOR DENORMALIZED X
+
+	fmoves		#0x3F800000,%fp0
+
+	fmovel		%d1,%FPCR
+	fadds		#0x00800000,%fp0
+	bra		t_frcinx
+
+	.global	scosh
+scosh:
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x400CB167,%d0
+	bgts		COSHBIG
+
+//--THIS IS THE USUAL CASE, |X| < 16380 LOG2
+//--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
+
+	fabsx		%fp0		// ...|X|
+
+	movel		%d1,-(%sp)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)	//pass parameter to setox
+	bsr		setox		// ...FP0 IS EXP(|X|)
+	fmuls		#0x3F000000,%fp0	// ...(1/2)EXP(|X|)
+	movel		(%sp)+,%d1
+
+	fmoves		#0x3E800000,%fp1	// ...(1/4)
+	fdivx		%fp0,%fp1	 	// ...1/(2 EXP(|X|))
+
+	fmovel		%d1,%FPCR
+	faddx		%fp1,%fp0
+
+	bra		t_frcinx
+
+COSHBIG:
+	cmpil		#0x400CB2B3,%d0
+	bgts		COSHHUGE
+
+	fabsx		%fp0
+	fsubd		T1(%pc),%fp0		// ...(|X|-16381LOG2_LEAD)
+	fsubd		T2(%pc),%fp0		// ...|X| - 16381 LOG2, ACCURATE
+
+	movel		%d1,-(%sp)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		setox
+	fmovel		(%sp)+,%fpcr
+
+	fmulx		TWO16380(%pc),%fp0
+	bra		t_frcinx
+
+COSHHUGE:
+	fmovel		#0,%fpsr		//clr N bit if set by source
+	bclrb		#7,(%a0)		//always return positive value
+	fmovemx	(%a0),%fp0-%fp0
+	bra		t_ovfl
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/setox.s b/c/src/lib/libcpu/m68k/m68040/fpsp/setox.s
new file mode 100644
index 0000000000..2caf829b06
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/setox.s
@@ -0,0 +1,865 @@
+//
+//	setox.sa 3.1 12/10/90
+//
+//	The entry point setox computes the exponential of a value.
+//	setoxd does the same except the input value is a denormalized
+//	number.	setoxm1 computes exp(X)-1, and setoxm1d computes
+//	exp(X)-1 for denormalized X.
+//
+//	INPUT
+//	-----
+//	Double-extended value in memory location pointed to by address
+//	register a0.
+//
+//	OUTPUT
+//	------
+//	exp(X) or exp(X)-1 returned in floating-point register fp0.
+//
+//	ACCURACY and MONOTONICITY
+//	-------------------------
+//	The returned result is within 0.85 ulps in 64 significant bit, i.e.
+//	within 0.5001 ulp to 53 bits if the result is subsequently rounded
+//	to double precision. The result is provably monotonic in double
+//	precision.
+//
+//	SPEED
+//	-----
+//	Two timings are measured, both in the copy-back mode. The
+//	first one is measured when the function is invoked the first time
+//	(so the instructions and data are not in cache), and the
+//	second one is measured when the function is reinvoked at the same
+//	input argument.
+//
+//	The program setox takes approximately 210/190 cycles for input
+//	argument X whose magnitude is less than 16380 log2, which
+//	is the usual situation.	For the less common arguments,
+//	depending on their values, the program may run faster or slower --
+//	but no worse than 10% slower even in the extreme cases.
+//
+//	The program setoxm1 takes approximately ???/??? cycles for input
+//	argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
+//	approximately ???/??? cycles. For the less common arguments,
+//	depending on their values, the program may run faster or slower --
+//	but no worse than 10% slower even in the extreme cases.
+//
+//	ALGORITHM and IMPLEMENTATION NOTES
+//	----------------------------------
+//
+//	setoxd
+//	------
+//	Step 1.	Set ans := 1.0
+//
+//	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.
+//	Notes:	This will always generate one exception -- inexact.
+//
+//
+//	setox
+//	-----
+//
+//	Step 1.	Filter out extreme cases of input argument.
+//		1.1	If |X| >= 2^(-65), go to Step 1.3.
+//		1.2	Go to Step 7.
+//		1.3	If |X| < 16380 log(2), go to Step 2.
+//		1.4	Go to Step 8.
+//	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
+//		 To avoid the use of floating-point comparisons, a
+//		 compact representation of |X| is used. This format is a
+//		 32-bit integer, the upper (more significant) 16 bits are
+//		 the sign and biased exponent field of |X|; the lower 16
+//		 bits are the 16 most significant fraction (including the
+//		 explicit bit) bits of |X|. Consequently, the comparisons
+//		 in Steps 1.1 and 1.3 can be performed by integer comparison.
+//		 Note also that the constant 16380 log(2) used in Step 1.3
+//		 is also in the compact form. Thus taking the branch
+//		 to Step 2 guarantees |X| < 16380 log(2). There is no harm
+//		 to have a small number of cases where |X| is less than,
+//		 but close to, 16380 log(2) and the branch to Step 9 is
+//		 taken.
+//
+//	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
+//		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
+//		2.2	N := round-to-nearest-integer( X * 64/log2 ).
+//		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
+//		2.4	Calculate	M = (N - J)/64; so N = 64M + J.
+//		2.5	Calculate the address of the stored value of 2^(J/64).
+//		2.6	Create the value Scale = 2^M.
+//	Notes:	The calculation in 2.2 is really performed by
+//
+//			Z := X * constant
+//			N := round-to-nearest-integer(Z)
+//
+//		 where
+//
+//			constant := single-precision( 64/log 2 ).
+//
+//		 Using a single-precision constant avoids memory access.
+//		 Another effect of using a single-precision "constant" is
+//		 that the calculated value Z is
+//
+//			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
+//
+//		 This error has to be considered later in Steps 3 and 4.
+//
+//	Step 3.	Calculate X - N*log2/64.
+//		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
+//		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
+//	Notes:	a) The way L1 and L2 are chosen ensures L1+L2 approximate
+//		 the value	-log2/64	to 88 bits of accuracy.
+//		 b) N*L1 is exact because N is no longer than 22 bits and
+//		 L1 is no longer than 24 bits.
+//		 c) The calculation X+N*L1 is also exact due to cancellation.
+//		 Thus, R is practically X+N(L1+L2) to full 64 bits.
+//		 d) It is important to estimate how large can |R| be after
+//		 Step 3.2.
+//
+//			N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
+//			X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5
+//			X*64/log2 - N	=	f - eps*X 64/log2
+//			X - N*log2/64	=	f*log2/64 - eps*X
+//
+//
+//		 Now |X| <= 16446 log2, thus
+//
+//			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
+//					<= 0.57 log2/64.
+//		 This bound will be used in Step 4.
+//
+//	Step 4.	Approximate exp(R)-1 by a polynomial
+//			p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
+//	Notes:	a) In order to reduce memory access, the coefficients are
+//		 made as "short" as possible: A1 (which is 1/2), A4 and A5
+//		 are single precision; A2 and A3 are double precision.
+//		 b) Even with the restrictions above,
+//			|p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
+//		 Note that 0.0062 is slightly bigger than 0.57 log2/64.
+//		 c) To fully utilize the pipeline, p is separated into
+//		 two independent pieces of roughly equal complexities
+//			p = [ R + R*S*(A2 + S*A4) ]	+
+//				[ S*(A1 + S*(A3 + S*A5)) ]
+//		 where S = R*R.
+//
+//	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
+//				ans := T + ( T*p + t)
+//		 where T and t are the stored values for 2^(J/64).
+//	Notes:	2^(J/64) is stored as T and t where T+t approximates
+//		 2^(J/64) to roughly 85 bits; T is in extended precision
+//		 and t is in single precision. Note also that T is rounded
+//		 to 62 bits so that the last two bits of T are zero. The
+//		 reason for such a special form is that T-1, T-2, and T-8
+//		 will all be exact --- a property that will give much
+//		 more accurate computation of the function EXPM1.
+//
+//	Step 6.	Reconstruction of exp(X)
+//			exp(X) = 2^M * 2^(J/64) * exp(R).
+//		6.1	If AdjFlag = 0, go to 6.3
+//		6.2	ans := ans * AdjScale
+//		6.3	Restore the user FPCR
+//		6.4	Return ans := ans * Scale. Exit.
+//	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
+//		 |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
+//		 neither overflow nor underflow. If AdjFlag = 1, that
+//		 means that
+//			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
+//		 Hence, exp(X) may overflow or underflow or neither.
+//		 When that is the case, AdjScale = 2^(M1) where M1 is
+//		 approximately M. Thus 6.2 will never cause over/underflow.
+//		 Possible exception in 6.4 is overflow or underflow.
+//		 The inexact exception is not generated in 6.4. Although
+//		 one can argue that the inexact flag should always be
+//		 raised, to simulate that exception cost to much than the
+//		 flag is worth in practical uses.
+//
+//	Step 7.	Return 1 + X.
+//		7.1	ans := X
+//		7.2	Restore user FPCR.
+//		7.3	Return ans := 1 + ans. Exit
+//	Notes:	For non-zero X, the inexact exception will always be
+//		 raised by 7.3. That is the only exception raised by 7.3.
+//		 Note also that we use the FMOVEM instruction to move X
+//		 in Step 7.1 to avoid unnecessary trapping. (Although
+//		 the FMOVEM may not seem relevant since X is normalized,
+//		 the precaution will be useful in the library version of
+//		 this code where the separate entry for denormalized inputs
+//		 will be done away with.)
+//
+//	Step 8.	Handle exp(X) where |X| >= 16380log2.
+//		8.1	If |X| > 16480 log2, go to Step 9.
+//		(mimic 2.2 - 2.6)
+//		8.2	N := round-to-integer( X * 64/log2 )
+//		8.3	Calculate J = N mod 64, J = 0,1,...,63
+//		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
+//		8.5	Calculate the address of the stored value 2^(J/64).
+//		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.
+//		8.7	Go to Step 3.
+//	Notes:	Refer to notes for 2.2 - 2.6.
+//
+//	Step 9.	Handle exp(X), |X| > 16480 log2.
+//		9.1	If X < 0, go to 9.3
+//		9.2	ans := Huge, go to 9.4
+//		9.3	ans := Tiny.
+//		9.4	Restore user FPCR.
+//		9.5	Return ans := ans * ans. Exit.
+//	Notes:	Exp(X) will surely overflow or underflow, depending on
+//		 X's sign. "Huge" and "Tiny" are respectively large/tiny
+//		 extended-precision numbers whose square over/underflow
+//		 with an inexact result. Thus, 9.5 always raises the
+//		 inexact together with either overflow or underflow.
+//
+//
+//	setoxm1d
+//	--------
+//
+//	Step 1.	Set ans := 0
+//
+//	Step 2.	Return	ans := X + ans. Exit.
+//	Notes:	This will return X with the appropriate rounding
+//		 precision prescribed by the user FPCR.
+//
+//	setoxm1
+//	-------
+//
+//	Step 1.	Check |X|
+//		1.1	If |X| >= 1/4, go to Step 1.3.
+//		1.2	Go to Step 7.
+//		1.3	If |X| < 70 log(2), go to Step 2.
+//		1.4	Go to Step 10.
+//	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
+//		 However, it is conceivable |X| can be small very often
+//		 because EXPM1 is intended to evaluate exp(X)-1 accurately
+//		 when |X| is small. For further details on the comparisons,
+//		 see the notes on Step 1 of setox.
+//
+//	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
+//		2.1	N := round-to-nearest-integer( X * 64/log2 ).
+//		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
+//		2.3	Calculate	M = (N - J)/64; so N = 64M + J.
+//		2.4	Calculate the address of the stored value of 2^(J/64).
+//		2.5	Create the values Sc = 2^M and OnebySc := -2^(-M).
+//	Notes:	See the notes on Step 2 of setox.
+//
+//	Step 3.	Calculate X - N*log2/64.
+//		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
+//		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
+//	Notes:	Applying the analysis of Step 3 of setox in this case
+//		 shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
+//		 this case).
+//
+//	Step 4.	Approximate exp(R)-1 by a polynomial
+//			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
+//	Notes:	a) In order to reduce memory access, the coefficients are
+//		 made as "short" as possible: A1 (which is 1/2), A5 and A6
+//		 are single precision; A2, A3 and A4 are double precision.
+//		 b) Even with the restriction above,
+//			|p - (exp(R)-1)| <	|R| * 2^(-72.7)
+//		 for all |R| <= 0.0055.
+//		 c) To fully utilize the pipeline, p is separated into
+//		 two independent pieces of roughly equal complexity
+//			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+
+//				[ R + S*(A1 + S*(A3 + S*A5)) ]
+//		 where S = R*R.
+//
+//	Step 5.	Compute 2^(J/64)*p by
+//				p := T*p
+//		 where T and t are the stored values for 2^(J/64).
+//	Notes:	2^(J/64) is stored as T and t where T+t approximates
+//		 2^(J/64) to roughly 85 bits; T is in extended precision
+//		 and t is in single precision. Note also that T is rounded
+//		 to 62 bits so that the last two bits of T are zero. The
+//		 reason for such a special form is that T-1, T-2, and T-8
+//		 will all be exact --- a property that will be exploited
+//		 in Step 6 below. The total relative error in p is no
+//		 bigger than 2^(-67.7) compared to the final result.
+//
+//	Step 6.	Reconstruction of exp(X)-1
+//			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
+//		6.1	If M <= 63, go to Step 6.3.
+//		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6
+//		6.3	If M >= -3, go to 6.5.
+//		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6
+//		6.5	ans := (T + OnebySc) + (p + t).
+//		6.6	Restore user FPCR.
+//		6.7	Return ans := Sc * ans. Exit.
+//	Notes:	The various arrangements of the expressions give accurate
+//		 evaluations.
+//
+//	Step 7.	exp(X)-1 for |X| < 1/4.
+//		7.1	If |X| >= 2^(-65), go to Step 9.
+//		7.2	Go to Step 8.
+//
+//	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).
+//		8.1	If |X| < 2^(-16312), goto 8.3
+//		8.2	Restore FPCR; return ans := X - 2^(-16382). Exit.
+//		8.3	X := X * 2^(140).
+//		8.4	Restore FPCR; ans := ans - 2^(-16382).
+//		 Return ans := ans*2^(140). Exit
+//	Notes:	The idea is to return "X - tiny" under the user
+//		 precision and rounding modes. To avoid unnecessary
+//		 inefficiency, we stay away from denormalized numbers the
+//		 best we can. For |X| >= 2^(-16312), the straightforward
+//		 8.2 generates the inexact exception as the case warrants.
+//
+//	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial
+//			p = X + X*X*(B1 + X*(B2 + ... + X*B12))
+//	Notes:	a) In order to reduce memory access, the coefficients are
+//		 made as "short" as possible: B1 (which is 1/2), B9 to B12
+//		 are single precision; B3 to B8 are double precision; and
+//		 B2 is double extended.
+//		 b) Even with the restriction above,
+//			|p - (exp(X)-1)| < |X| 2^(-70.6)
+//		 for all |X| <= 0.251.
+//		 Note that 0.251 is slightly bigger than 1/4.
+//		 c) To fully preserve accuracy, the polynomial is computed
+//		 as	X + ( S*B1 +	Q ) where S = X*X and
+//			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))
+//		 d) To fully utilize the pipeline, Q is separated into
+//		 two independent pieces of roughly equal complexity
+//			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
+//				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]
+//
+//	Step 10.	Calculate exp(X)-1 for |X| >= 70 log 2.
+//		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
+//		 purposes. Therefore, go to Step 1 of setox.
+//		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
+//		 ans := -1
+//		 Restore user FPCR
+//		 Return ans := ans + 2^(-126). Exit.
+//	Notes:	10.2 will always create an inexact and return -1 + tiny
+//		 in the user rounding precision and mode.
+//
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//setox	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+L2:	.long	0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
+
+EXPA3:	.long	0x3FA55555,0x55554431
+EXPA2:	.long	0x3FC55555,0x55554018
+
+HUGE:	.long	0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+TINY:	.long	0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+
+EM1A4:	.long	0x3F811111,0x11174385
+EM1A3:	.long	0x3FA55555,0x55554F5A
+
+EM1A2:	.long	0x3FC55555,0x55555555,0x00000000,0x00000000
+
+EM1B8:	.long	0x3EC71DE3,0xA5774682
+EM1B7:	.long	0x3EFA01A0,0x19D7CB68
+
+EM1B6:	.long	0x3F2A01A0,0x1A019DF3
+EM1B5:	.long	0x3F56C16C,0x16C170E2
+
+EM1B4:	.long	0x3F811111,0x11111111
+EM1B3:	.long	0x3FA55555,0x55555555
+
+EM1B2:	.long	0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
+	.long	0x00000000
+
+TWO140:	.long	0x48B00000,0x00000000
+TWON140:	.long	0x37300000,0x00000000
+
+EXPTBL:
+	.long	0x3FFF0000,0x80000000,0x00000000,0x00000000
+	.long	0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
+	.long	0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
+	.long	0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
+	.long	0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
+	.long	0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
+	.long	0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
+	.long	0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
+	.long	0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
+	.long	0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
+	.long	0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
+	.long	0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
+	.long	0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
+	.long	0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
+	.long	0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
+	.long	0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
+	.long	0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
+	.long	0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
+	.long	0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
+	.long	0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
+	.long	0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
+	.long	0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
+	.long	0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
+	.long	0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
+	.long	0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
+	.long	0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
+	.long	0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
+	.long	0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
+	.long	0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
+	.long	0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
+	.long	0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
+	.long	0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
+	.long	0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
+	.long	0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
+	.long	0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
+	.long	0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
+	.long	0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
+	.long	0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
+	.long	0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
+	.long	0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
+	.long	0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
+	.long	0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
+	.long	0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
+	.long	0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
+	.long	0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
+	.long	0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
+	.long	0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
+	.long	0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
+	.long	0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
+	.long	0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
+	.long	0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
+	.long	0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
+	.long	0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
+	.long	0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
+	.long	0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
+	.long	0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
+	.long	0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
+	.long	0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
+	.long	0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
+	.long	0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
+	.long	0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
+	.long	0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
+	.long	0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
+	.long	0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
+
+	.set	ADJFLAG,L_SCR2
+	.set	SCALE,FP_SCR1
+	.set	ADJSCALE,FP_SCR2
+	.set	SC,FP_SCR3
+	.set	ONEBYSC,FP_SCR4
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	t_unfl
+	|xref	t_ovfl
+
+	.global	setoxd
+setoxd:
+//--entry point for EXP(X), X is denormalized
+	movel		(%a0),%d0
+	andil		#0x80000000,%d0
+	oril		#0x00800000,%d0		// ...sign(X)*2^(-126)
+	movel		%d0,-(%sp)
+	fmoves		#0x3F800000,%fp0
+	fmovel		%d1,%fpcr
+	fadds		(%sp)+,%fp0
+	bra		t_frcinx
+
+	.global	setox
+setox:
+//--entry point for EXP(X), here X is finite, non-zero, and not NaN's
+
+//--Step 1.
+	movel		(%a0),%d0	 // ...load part of input X
+	andil		#0x7FFF0000,%d0	// ...biased expo. of X
+	cmpil		#0x3FBE0000,%d0	// ...2^(-65)
+	bges		EXPC1		// ...normal case
+	bra		EXPSM
+
+EXPC1:
+//--The case |X| >= 2^(-65)
+	movew		4(%a0),%d0	// ...expo. and partial sig. of |X|
+	cmpil		#0x400CB167,%d0	// ...16380 log2 trunc. 16 bits
+	blts		EXPMAIN	 // ...normal case
+	bra		EXPBIG
+
+EXPMAIN:
+//--Step 2.
+//--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
+	fmovex		(%a0),%fp0	// ...load input from (a0)
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42B8AA3B,%fp0	// ...64/log2 * X
+	fmovemx	%fp2-%fp2/%fp3,-(%a7)		// ...save fp2
+	movel		#0,ADJFLAG(%a6)
+	fmovel		%fp0,%d0		// ...N = int( X * 64/log2 )
+	lea		EXPTBL,%a1
+	fmovel		%d0,%fp0		// ...convert to floating-format
+
+	movel		%d0,L_SCR1(%a6)	// ...save N temporarily
+	andil		#0x3F,%d0		// ...D0 is J = N mod 64
+	lsll		#4,%d0
+	addal		%d0,%a1		// ...address of 2^(J/64)
+	movel		L_SCR1(%a6),%d0
+	asrl		#6,%d0		// ...D0 is M
+	addiw		#0x3FFF,%d0	// ...biased expo. of 2^(M)
+	movew		L2,L_SCR1(%a6)	// ...prefetch L2, no need in CB
+
+EXPCONT1:
+//--Step 3.
+//--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
+//--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
+	fmovex		%fp0,%fp2
+	fmuls		#0xBC317218,%fp0	// ...N * L1, L1 = lead(-log2/64)
+	fmulx		L2,%fp2		// ...N * L2, L1+L2 = -log2/64
+	faddx		%fp1,%fp0	 	// ...X + N*L1
+	faddx		%fp2,%fp0		// ...fp0 is R, reduced arg.
+//	MOVE.W		#$3FA5,EXPA3	...load EXPA3 in cache
+
+//--Step 4.
+//--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
+//-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
+//--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
+//--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1	 	// ...fp1 IS S = R*R
+
+	fmoves		#0x3AB60B70,%fp2	// ...fp2 IS A5
+//	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
+
+	fmulx		%fp1,%fp2	 	// ...fp2 IS S*A5
+	fmovex		%fp1,%fp3
+	fmuls		#0x3C088895,%fp3	// ...fp3 IS S*A4
+
+	faddd		EXPA3,%fp2	// ...fp2 IS A3+S*A5
+	faddd		EXPA2,%fp3	// ...fp3 IS A2+S*A4
+
+	fmulx		%fp1,%fp2	 	// ...fp2 IS S*(A3+S*A5)
+	movew		%d0,SCALE(%a6)	// ...SCALE is 2^(M) in extended
+	clrw		SCALE+2(%a6)
+	movel		#0x80000000,SCALE+4(%a6)
+	clrl		SCALE+8(%a6)
+
+	fmulx		%fp1,%fp3	 	// ...fp3 IS S*(A2+S*A4)
+
+	fadds		#0x3F000000,%fp2	// ...fp2 IS A1+S*(A3+S*A5)
+	fmulx		%fp0,%fp3	 	// ...fp3 IS R*S*(A2+S*A4)
+
+	fmulx		%fp1,%fp2	 	// ...fp2 IS S*(A1+S*(A3+S*A5))
+	faddx		%fp3,%fp0	 	// ...fp0 IS R+R*S*(A2+S*A4),
+//					...fp3 released
+
+	fmovex		(%a1)+,%fp1	// ...fp1 is lead. pt. of 2^(J/64)
+	faddx		%fp2,%fp0	 	// ...fp0 is EXP(R) - 1
+//					...fp2 released
+
+//--Step 5
+//--final reconstruction process
+//--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
+
+	fmulx		%fp1,%fp0	 	// ...2^(J/64)*(Exp(R)-1)
+	fmovemx	(%a7)+,%fp2-%fp2/%fp3	// ...fp2 restored
+	fadds		(%a1),%fp0	// ...accurate 2^(J/64)
+
+	faddx		%fp1,%fp0	 	// ...2^(J/64) + 2^(J/64)*...
+	movel		ADJFLAG(%a6),%d0
+
+//--Step 6
+	tstl		%d0
+	beqs		NORMAL
+ADJUST:
+	fmulx		ADJSCALE(%a6),%fp0
+NORMAL:
+	fmovel		%d1,%FPCR	 	// ...restore user FPCR
+	fmulx		SCALE(%a6),%fp0	// ...multiply 2^(M)
+	bra		t_frcinx
+
+EXPSM:
+//--Step 7
+	fmovemx	(%a0),%fp0-%fp0	// ...in case X is denormalized
+	fmovel		%d1,%FPCR
+	fadds		#0x3F800000,%fp0	// ...1+X in user mode
+	bra		t_frcinx
+
+EXPBIG:
+//--Step 8
+	cmpil		#0x400CB27C,%d0	// ...16480 log2
+	bgts		EXP2BIG
+//--Steps 8.2 -- 8.6
+	fmovex		(%a0),%fp0	// ...load input from (a0)
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42B8AA3B,%fp0	// ...64/log2 * X
+	fmovemx	 %fp2-%fp2/%fp3,-(%a7)		// ...save fp2
+	movel		#1,ADJFLAG(%a6)
+	fmovel		%fp0,%d0		// ...N = int( X * 64/log2 )
+	lea		EXPTBL,%a1
+	fmovel		%d0,%fp0		// ...convert to floating-format
+	movel		%d0,L_SCR1(%a6)			// ...save N temporarily
+	andil		#0x3F,%d0		 // ...D0 is J = N mod 64
+	lsll		#4,%d0
+	addal		%d0,%a1			// ...address of 2^(J/64)
+	movel		L_SCR1(%a6),%d0
+	asrl		#6,%d0			// ...D0 is K
+	movel		%d0,L_SCR1(%a6)			// ...save K temporarily
+	asrl		#1,%d0			// ...D0 is M1
+	subl		%d0,L_SCR1(%a6)			// ...a1 is M
+	addiw		#0x3FFF,%d0		// ...biased expo. of 2^(M1)
+	movew		%d0,ADJSCALE(%a6)		// ...ADJSCALE := 2^(M1)
+	clrw		ADJSCALE+2(%a6)
+	movel		#0x80000000,ADJSCALE+4(%a6)
+	clrl		ADJSCALE+8(%a6)
+	movel		L_SCR1(%a6),%d0			// ...D0 is M
+	addiw		#0x3FFF,%d0		// ...biased expo. of 2^(M)
+	bra		EXPCONT1		// ...go back to Step 3
+
+EXP2BIG:
+//--Step 9
+	fmovel		%d1,%FPCR
+	movel		(%a0),%d0
+	bclrb		#sign_bit,(%a0)		// ...setox always returns positive
+	cmpil		#0,%d0
+	blt		t_unfl
+	bra		t_ovfl
+
+	.global	setoxm1d
+setoxm1d:
+//--entry point for EXPM1(X), here X is denormalized
+//--Step 0.
+	bra		t_extdnrm
+
+
+	.global	setoxm1
+setoxm1:
+//--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
+
+//--Step 1.
+//--Step 1.1
+	movel		(%a0),%d0	 // ...load part of input X
+	andil		#0x7FFF0000,%d0	// ...biased expo. of X
+	cmpil		#0x3FFD0000,%d0	// ...1/4
+	bges		EM1CON1	 // ...|X| >= 1/4
+	bra		EM1SM
+
+EM1CON1:
+//--Step 1.3
+//--The case |X| >= 1/4
+	movew		4(%a0),%d0	// ...expo. and partial sig. of |X|
+	cmpil		#0x4004C215,%d0	// ...70log2 rounded up to 16 bits
+	bles		EM1MAIN	 // ...1/4 <= |X| <= 70log2
+	bra		EM1BIG
+
+EM1MAIN:
+//--Step 2.
+//--This is the case:	1/4 <= |X| <= 70 log2.
+	fmovex		(%a0),%fp0	// ...load input from (a0)
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42B8AA3B,%fp0	// ...64/log2 * X
+	fmovemx	%fp2-%fp2/%fp3,-(%a7)		// ...save fp2
+//	MOVE.W		#$3F81,EM1A4		...prefetch in CB mode
+	fmovel		%fp0,%d0		// ...N = int( X * 64/log2 )
+	lea		EXPTBL,%a1
+	fmovel		%d0,%fp0		// ...convert to floating-format
+
+	movel		%d0,L_SCR1(%a6)			// ...save N temporarily
+	andil		#0x3F,%d0		 // ...D0 is J = N mod 64
+	lsll		#4,%d0
+	addal		%d0,%a1			// ...address of 2^(J/64)
+	movel		L_SCR1(%a6),%d0
+	asrl		#6,%d0			// ...D0 is M
+	movel		%d0,L_SCR1(%a6)			// ...save a copy of M
+//	MOVE.W		#$3FDC,L2		...prefetch L2 in CB mode
+
+//--Step 3.
+//--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
+//--a0 points to 2^(J/64), D0 and a1 both contain M
+	fmovex		%fp0,%fp2
+	fmuls		#0xBC317218,%fp0	// ...N * L1, L1 = lead(-log2/64)
+	fmulx		L2,%fp2		// ...N * L2, L1+L2 = -log2/64
+	faddx		%fp1,%fp0	 // ...X + N*L1
+	faddx		%fp2,%fp0	 // ...fp0 is R, reduced arg.
+//	MOVE.W		#$3FC5,EM1A2		...load EM1A2 in cache
+	addiw		#0x3FFF,%d0		// ...D0 is biased expo. of 2^M
+
+//--Step 4.
+//--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
+//-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
+//--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
+//--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		// ...fp1 IS S = R*R
+
+	fmoves		#0x3950097B,%fp2	// ...fp2 IS a6
+//	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
+
+	fmulx		%fp1,%fp2		// ...fp2 IS S*A6
+	fmovex		%fp1,%fp3
+	fmuls		#0x3AB60B6A,%fp3	// ...fp3 IS S*A5
+
+	faddd		EM1A4,%fp2	// ...fp2 IS A4+S*A6
+	faddd		EM1A3,%fp3	// ...fp3 IS A3+S*A5
+	movew		%d0,SC(%a6)		// ...SC is 2^(M) in extended
+	clrw		SC+2(%a6)
+	movel		#0x80000000,SC+4(%a6)
+	clrl		SC+8(%a6)
+
+	fmulx		%fp1,%fp2		// ...fp2 IS S*(A4+S*A6)
+	movel		L_SCR1(%a6),%d0		// ...D0 is	M
+	negw		%d0		// ...D0 is -M
+	fmulx		%fp1,%fp3		// ...fp3 IS S*(A3+S*A5)
+	addiw		#0x3FFF,%d0	// ...biased expo. of 2^(-M)
+	faddd		EM1A2,%fp2	// ...fp2 IS A2+S*(A4+S*A6)
+	fadds		#0x3F000000,%fp3	// ...fp3 IS A1+S*(A3+S*A5)
+
+	fmulx		%fp1,%fp2		// ...fp2 IS S*(A2+S*(A4+S*A6))
+	oriw		#0x8000,%d0	// ...signed/expo. of -2^(-M)
+	movew		%d0,ONEBYSC(%a6)	// ...OnebySc is -2^(-M)
+	clrw		ONEBYSC+2(%a6)
+	movel		#0x80000000,ONEBYSC+4(%a6)
+	clrl		ONEBYSC+8(%a6)
+	fmulx		%fp3,%fp1		// ...fp1 IS S*(A1+S*(A3+S*A5))
+//					...fp3 released
+
+	fmulx		%fp0,%fp2		// ...fp2 IS R*S*(A2+S*(A4+S*A6))
+	faddx		%fp1,%fp0		// ...fp0 IS R+S*(A1+S*(A3+S*A5))
+//					...fp1 released
+
+	faddx		%fp2,%fp0		// ...fp0 IS EXP(R)-1
+//					...fp2 released
+	fmovemx	(%a7)+,%fp2-%fp2/%fp3	// ...fp2 restored
+
+//--Step 5
+//--Compute 2^(J/64)*p
+
+	fmulx		(%a1),%fp0	// ...2^(J/64)*(Exp(R)-1)
+
+//--Step 6
+//--Step 6.1
+	movel		L_SCR1(%a6),%d0		// ...retrieve M
+	cmpil		#63,%d0
+	bles		MLE63
+//--Step 6.2	M >= 64
+	fmoves		12(%a1),%fp1	// ...fp1 is t
+	faddx		ONEBYSC(%a6),%fp1	// ...fp1 is t+OnebySc
+	faddx		%fp1,%fp0		// ...p+(t+OnebySc), fp1 released
+	faddx		(%a1),%fp0	// ...T+(p+(t+OnebySc))
+	bras		EM1SCALE
+MLE63:
+//--Step 6.3	M <= 63
+	cmpil		#-3,%d0
+	bges		MGEN3
+MLTN3:
+//--Step 6.4	M <= -4
+	fadds		12(%a1),%fp0	// ...p+t
+	faddx		(%a1),%fp0	// ...T+(p+t)
+	faddx		ONEBYSC(%a6),%fp0	// ...OnebySc + (T+(p+t))
+	bras		EM1SCALE
+MGEN3:
+//--Step 6.5	-3 <= M <= 63
+	fmovex		(%a1)+,%fp1	// ...fp1 is T
+	fadds		(%a1),%fp0	// ...fp0 is p+t
+	faddx		ONEBYSC(%a6),%fp1	// ...fp1 is T+OnebySc
+	faddx		%fp1,%fp0		// ...(T+OnebySc)+(p+t)
+
+EM1SCALE:
+//--Step 6.6
+	fmovel		%d1,%FPCR
+	fmulx		SC(%a6),%fp0
+
+	bra		t_frcinx
+
+EM1SM:
+//--Step 7	|X| < 1/4.
+	cmpil		#0x3FBE0000,%d0	// ...2^(-65)
+	bges		EM1POLY
+
+EM1TINY:
+//--Step 8	|X| < 2^(-65)
+	cmpil		#0x00330000,%d0	// ...2^(-16312)
+	blts		EM12TINY
+//--Step 8.2
+	movel		#0x80010000,SC(%a6)	// ...SC is -2^(-16382)
+	movel		#0x80000000,SC+4(%a6)
+	clrl		SC+8(%a6)
+	fmovex		(%a0),%fp0
+	fmovel		%d1,%FPCR
+	faddx		SC(%a6),%fp0
+
+	bra		t_frcinx
+
+EM12TINY:
+//--Step 8.3
+	fmovex		(%a0),%fp0
+	fmuld		TWO140,%fp0
+	movel		#0x80010000,SC(%a6)
+	movel		#0x80000000,SC+4(%a6)
+	clrl		SC+8(%a6)
+	faddx		SC(%a6),%fp0
+	fmovel		%d1,%FPCR
+	fmuld		TWON140,%fp0
+
+	bra		t_frcinx
+
+EM1POLY:
+//--Step 9	exp(X)-1 by a simple polynomial
+	fmovex		(%a0),%fp0	// ...fp0 is X
+	fmulx		%fp0,%fp0		// ...fp0 is S := X*X
+	fmovemx	%fp2-%fp2/%fp3,-(%a7)	// ...save fp2
+	fmoves		#0x2F30CAA8,%fp1	// ...fp1 is B12
+	fmulx		%fp0,%fp1		// ...fp1 is S*B12
+	fmoves		#0x310F8290,%fp2	// ...fp2 is B11
+	fadds		#0x32D73220,%fp1	// ...fp1 is B10+S*B12
+
+	fmulx		%fp0,%fp2		// ...fp2 is S*B11
+	fmulx		%fp0,%fp1		// ...fp1 is S*(B10 + ...
+
+	fadds		#0x3493F281,%fp2	// ...fp2 is B9+S*...
+	faddd		EM1B8,%fp1	// ...fp1 is B8+S*...
+
+	fmulx		%fp0,%fp2		// ...fp2 is S*(B9+...
+	fmulx		%fp0,%fp1		// ...fp1 is S*(B8+...
+
+	faddd		EM1B7,%fp2	// ...fp2 is B7+S*...
+	faddd		EM1B6,%fp1	// ...fp1 is B6+S*...
+
+	fmulx		%fp0,%fp2		// ...fp2 is S*(B7+...
+	fmulx		%fp0,%fp1		// ...fp1 is S*(B6+...
+
+	faddd		EM1B5,%fp2	// ...fp2 is B5+S*...
+	faddd		EM1B4,%fp1	// ...fp1 is B4+S*...
+
+	fmulx		%fp0,%fp2		// ...fp2 is S*(B5+...
+	fmulx		%fp0,%fp1		// ...fp1 is S*(B4+...
+
+	faddd		EM1B3,%fp2	// ...fp2 is B3+S*...
+	faddx		EM1B2,%fp1	// ...fp1 is B2+S*...
+
+	fmulx		%fp0,%fp2		// ...fp2 is S*(B3+...
+	fmulx		%fp0,%fp1		// ...fp1 is S*(B2+...
+
+	fmulx		%fp0,%fp2		// ...fp2 is S*S*(B3+...)
+	fmulx		(%a0),%fp1	// ...fp1 is X*S*(B2...
+
+	fmuls		#0x3F000000,%fp0	// ...fp0 is S*B1
+	faddx		%fp2,%fp1		// ...fp1 is Q
+//					...fp2 released
+
+	fmovemx	(%a7)+,%fp2-%fp2/%fp3	// ...fp2 restored
+
+	faddx		%fp1,%fp0		// ...fp0 is S*B1+Q
+//					...fp1 released
+
+	fmovel		%d1,%FPCR
+	faddx		(%a0),%fp0
+
+	bra		t_frcinx
+
+EM1BIG:
+//--Step 10	|X| > 70 log2
+	movel		(%a0),%d0
+	cmpil		#0,%d0
+	bgt		EXPC1
+//--Step 10.2
+	fmoves		#0xBF800000,%fp0	// ...fp0 is -1
+	fmovel		%d1,%FPCR
+	fadds		#0x00800000,%fp0	// ...-1 + 2^(-126)
+
+	bra		t_frcinx
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/sgetem.s b/c/src/lib/libcpu/m68k/m68040/fpsp/sgetem.s
new file mode 100644
index 0000000000..ca3cc09085
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sgetem.s
@@ -0,0 +1,141 @@
+//
+//	sgetem.sa 3.1 12/10/90
+//
+//	The entry point sGETEXP returns the exponent portion 
+//	of the input argument.  The exponent bias is removed
+//	and the exponent value is returned as an extended 
+//	precision number in fp0.  sGETEXPD handles denormalized
+//	numbers.
+//
+//	The entry point sGETMAN extracts the mantissa of the 
+//	input argument.  The mantissa is converted to an 
+//	extended precision number and returned in fp0.  The
+//	range of the result is [1.0 - 2.0).
+//
+//
+//	Input:  Double-extended number X in the ETEMP space in
+//		the floating-point save stack.
+//
+//	Output:	The functions return exp(X) or man(X) in fp0.
+//
+//	Modified: fp0.
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SGETEM	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section 8
+
+	.include "fpsp.defs"
+
+	|xref	nrm_set
+
+//
+// This entry point is used by the unimplemented instruction exception
+// handler.  It points a0 to the input operand.
+//
+//
+//
+//	SGETEXP
+//
+
+	.global	sgetexp
+sgetexp:
+	movew	LOCAL_EX(%a0),%d0	//get the exponent
+	bclrl	#15,%d0		//clear the sign bit
+	subw	#0x3fff,%d0	//subtract off the bias
+	fmovew  %d0,%fp0		//move the exp to fp0
+	rts
+
+	.global	sgetexpd
+sgetexpd:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	bsr	nrm_set		//normalize (exp will go negative)
+	movew	LOCAL_EX(%a0),%d0	//load resulting exponent into d0
+	subw	#0x3fff,%d0	//subtract off the bias
+	fmovew	%d0,%fp0		//move the exp to fp0
+	rts
+//
+//
+// This entry point is used by the unimplemented instruction exception
+// handler.  It points a0 to the input operand.
+//
+//
+//
+//	SGETMAN
+//
+//
+// For normalized numbers, leave the mantissa alone, simply load
+// with an exponent of +/- $3fff.
+//
+	.global	sgetman
+sgetman:
+	movel	USER_FPCR(%a6),%d0
+	andil	#0xffffff00,%d0	//clear rounding precision and mode
+	fmovel	%d0,%fpcr		//this fpcr setting is used by the 882
+	movew	LOCAL_EX(%a0),%d0	//get the exp (really just want sign bit)
+	orw	#0x7fff,%d0	//clear old exp
+	bclrl	#14,%d0	 	//make it the new exp +-3fff
+	movew	%d0,LOCAL_EX(%a0)	//move the sign & exp back to fsave stack
+	fmovex	(%a0),%fp0	//put new value back in fp0
+	rts
+
+//
+// For denormalized numbers, shift the mantissa until the j-bit = 1,
+// then load the exponent with +/1 $3fff.
+//
+	.global	sgetmand
+sgetmand:
+	movel	LOCAL_HI(%a0),%d0	//load ms mant in d0
+	movel	LOCAL_LO(%a0),%d1	//load ls mant in d1
+	bsr	shft		//shift mantissa bits till msbit is set
+	movel	%d0,LOCAL_HI(%a0)	//put ms mant back on stack
+	movel	%d1,LOCAL_LO(%a0)	//put ls mant back on stack
+	bras	sgetman
+
+//
+//	SHFT
+//
+//	Shifts the mantissa bits until msbit is set.
+//	input:
+//		ms mantissa part in d0
+//		ls mantissa part in d1
+//	output:
+//		shifted bits in d0 and d1
+shft:
+	tstl	%d0		//if any bits set in ms mant
+	bnes	upper		//then branch
+//				;else no bits set in ms mant
+	tstl	%d1		//test if any bits set in ls mant
+	bnes	cont		//if set then continue
+	bras	shft_end	//else return
+cont:
+	movel	%d3,-(%a7)	//save d3
+	exg	%d0,%d1		//shift ls mant to ms mant
+	bfffo	%d0{#0:#32},%d3	//find first 1 in ls mant to d0
+	lsll	%d3,%d0		//shift first 1 to integer bit in ms mant
+	movel	(%a7)+,%d3	//restore d3
+	bras	shft_end
+upper:
+
+	moveml	%d3/%d5/%d6,-(%a7)	//save registers
+	bfffo	%d0{#0:#32},%d3	//find first 1 in ls mant to d0
+	lsll	%d3,%d0		//shift ms mant until j-bit is set
+	movel	%d1,%d6		//save ls mant in d6
+	lsll	%d3,%d1		//shift ls mant by count
+	movel	#32,%d5
+	subl	%d3,%d5		//sub 32 from shift for ls mant
+	lsrl	%d5,%d6		//shift off all bits but those that will
+//				;be shifted into ms mant
+	orl	%d6,%d0		//shift the ls mant bits into the ms mant
+	moveml	(%a7)+,%d3/%d5/%d6	//restore registers
+shft_end:
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/sint.s b/c/src/lib/libcpu/m68k/m68040/fpsp/sint.s
new file mode 100644
index 0000000000..9cfd6c67ed
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sint.s
@@ -0,0 +1,247 @@
+//
+//	sint.sa 3.1 12/10/90
+//
+//	The entry point sINT computes the rounded integer 
+//	equivalent of the input argument, sINTRZ computes 
+//	the integer rounded to zero of the input argument.
+//
+//	Entry points sint and sintrz are called from do_func
+//	to emulate the fint and fintrz unimplemented instructions,
+//	respectively.  Entry point sintdo is used by bindec.
+//
+//	Input: (Entry points sint and sintrz) Double-extended
+//		number X in the ETEMP space in the floating-point
+//		save stack.
+//	       (Entry point sintdo) Double-extended number X in
+//		location pointed to by the address register a0.
+//	       (Entry point sintd) Double-extended denormalized
+//		number X in the ETEMP space in the floating-point
+//		save stack.
+//
+//	Output: The function returns int(X) or intrz(X) in fp0.
+//
+//	Modifies: fp0.
+//
+//	Algorithm: (sint and sintrz)
+//
+//	1. If exp(X) >= 63, return X. 
+//	   If exp(X) < 0, return +/- 0 or +/- 1, according to
+//	   the rounding mode.
+//	
+//	2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
+//	   result to the exponent $403e.
+//
+//	3. Round the result in the mode given in USER_FPCR. For
+//	   sintrz, force round-to-zero mode.
+//
+//	4. Normalize the rounded result; store in fp0.
+//
+//	For the denormalized cases, force the correct result
+//	for the given sign and rounding mode.
+//
+//		        Sign(X)
+//		RMODE   +    -
+//		-----  --------
+//		 RN    +0   -0
+//		 RZ    +0   -0
+//		 RM    +0   -1
+//		 RP    +1   -0
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SINT    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	dnrm_lp
+	|xref	nrm_set
+	|xref	round
+	|xref	t_inx2
+	|xref	ld_pone
+	|xref	ld_mone
+	|xref	ld_pzero
+	|xref	ld_mzero
+	|xref	snzrinx
+
+//
+//	FINT
+//
+	.global	sint
+sint:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//use user's mode for rounding
+//					;implicitly has extend precision
+//					;in upper word. 
+	movel	%d1,L_SCR1(%a6)		//save mode bits
+	bras	sintexc			
+
+//
+//	FINT with extended denorm inputs.
+//
+	.global	sintd
+sintd:
+	btstb	#5,FPCR_MODE(%a6)
+	beq	snzrinx		//if round nearest or round zero, +/- 0
+	btstb	#4,FPCR_MODE(%a6)
+	beqs	rnd_mns
+rnd_pls:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bnes	sintmz
+	bsr	ld_pone		//if round plus inf and pos, answer is +1
+	bra	t_inx2
+rnd_mns:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beqs	sintpz
+	bsr	ld_mone		//if round mns inf and neg, answer is -1
+	bra	t_inx2
+sintpz:
+	bsr	ld_pzero
+	bra	t_inx2
+sintmz:
+	bsr	ld_mzero
+	bra	t_inx2
+
+//
+//	FINTRZ
+//
+	.global	sintrz
+sintrz:
+	movel	#1,L_SCR1(%a6)		//use rz mode for rounding
+//					;implicitly has extend precision
+//					;in upper word. 
+	bras	sintexc			
+//
+//	SINTDO
+//
+//	Input:	a0 points to an IEEE extended format operand
+// 	Output:	fp0 has the result 
+//
+// Exceptions:
+//
+// If the subroutine results in an inexact operation, the inx2 and
+// ainx bits in the USER_FPSR are set.
+//
+//
+	.global	sintdo
+sintdo:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//use user's mode for rounding
+//					;implicitly has ext precision
+//					;in upper word. 
+	movel	%d1,L_SCR1(%a6)		//save mode bits
+//
+// Real work of sint is in sintexc
+//
+sintexc:
+	bclrb	#sign_bit,LOCAL_EX(%a0)	//convert to internal extended
+//					;format
+	sne	LOCAL_SGN(%a0)		
+	cmpw	#0x403e,LOCAL_EX(%a0)	//check if (unbiased) exp > 63
+	bgts	out_rnge			//branch if exp < 63
+	cmpw	#0x3ffd,LOCAL_EX(%a0)	//check if (unbiased) exp < 0
+	bgt	in_rnge			//if 63 >= exp > 0, do calc
+//
+// Input is less than zero.  Restore sign, and check for directed
+// rounding modes.  L_SCR1 contains the rmode in the lower byte.
+//
+un_rnge:
+	btstb	#1,L_SCR1+3(%a6)		//check for rn and rz
+	beqs	un_rnrz
+	tstb	LOCAL_SGN(%a0)		//check for sign
+	bnes	un_rmrp_neg
+//
+// Sign is +.  If rp, load +1.0, if rm, load +0.0
+//
+	cmpib	#3,L_SCR1+3(%a6)		//check for rp
+	beqs	un_ldpone		//if rp, load +1.0
+	bsr	ld_pzero		//if rm, load +0.0
+	bra	t_inx2
+un_ldpone:
+	bsr	ld_pone
+	bra	t_inx2
+//
+// Sign is -.  If rm, load -1.0, if rp, load -0.0
+//
+un_rmrp_neg:
+	cmpib	#2,L_SCR1+3(%a6)		//check for rm
+	beqs	un_ldmone		//if rm, load -1.0
+	bsr	ld_mzero		//if rp, load -0.0
+	bra	t_inx2
+un_ldmone:
+	bsr	ld_mone
+	bra	t_inx2
+//
+// Rmode is rn or rz; return signed zero
+//
+un_rnrz:
+	tstb	LOCAL_SGN(%a0)		//check for sign
+	bnes	un_rnrz_neg
+	bsr	ld_pzero
+	bra	t_inx2
+un_rnrz_neg:
+	bsr	ld_mzero
+	bra	t_inx2
+	
+//
+// Input is greater than 2^63.  All bits are significant.  Return
+// the input.
+//
+out_rnge:
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
+	beqs	intps
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+intps:
+	fmovel	%fpcr,-(%sp)
+	fmovel	#0,%fpcr
+	fmovex LOCAL_EX(%a0),%fp0	//if exp > 63
+//					;then return X to the user
+//					;there are no fraction bits
+	fmovel	(%sp)+,%fpcr
+	rts
+
+in_rnge:
+// 					;shift off fraction bits
+	clrl	%d0			//clear d0 - initial g,r,s for
+//					;dnrm_lp
+	movel	#0x403e,%d1		//set threshold for dnrm_lp
+//					;assumes a0 points to operand
+	bsr	dnrm_lp
+//					;returns unnormalized number
+//					;pointed by a0
+//					;output d0 supplies g,r,s
+//					;used by round
+	movel	L_SCR1(%a6),%d1		//use selected rounding mode
+//
+//
+	bsr	round			//round the unnorm based on users
+//					;input	a0 ptr to ext X
+//					;	d0 g,r,s bits
+//					;	d1 PREC/MODE info
+//					;output a0 ptr to rounded result
+//					;inexact flag set in USER_FPSR
+//					;if initial grs set
+//
+// normalize the rounded result and store value in fp0
+//
+	bsr	nrm_set			//normalize the unnorm
+//					;Input: a0 points to operand to
+//					;be normalized
+//					;Output: a0 points to normalized
+//					;result
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	nrmrndp
+	bsetb	#sign_bit,LOCAL_EX(%a0)	//return to IEEE extended format
+nrmrndp:
+	fmovel	%fpcr,-(%sp)
+	fmovel	#0,%fpcr
+	fmovex LOCAL_EX(%a0),%fp0	//move result to fp0
+	fmovel	(%sp)+,%fpcr
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/slog2.s b/c/src/lib/libcpu/m68k/m68040/fpsp/slog2.s
new file mode 100644
index 0000000000..b65e6f7b16
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/slog2.s
@@ -0,0 +1,188 @@
+//
+//	slog2.sa 3.1 12/10/90
+//
+//       The entry point slog10 computes the base-10 
+//	logarithm of an input argument X.
+//	slog10d does the same except the input value is a 
+//	denormalized number.  
+//	sLog2 and sLog2d are the base-2 analogues.
+//
+//       INPUT:	Double-extended value in memory location pointed to 
+//		by address register a0.
+//
+//       OUTPUT: log_10(X) or log_2(X) returned in floating-point 
+//		register fp0.
+//
+//       ACCURACY and MONOTONICITY: The returned result is within 1.7 
+//		ulps in 64 significant bit, i.e. within 0.5003 ulp 
+//		to 53 bits if the result is subsequently rounded 
+//		to double precision. The result is provably monotonic 
+//		in double precision.
+//
+//       SPEED:	Two timings are measured, both in the copy-back mode. 
+//		The first one is measured when the function is invoked 
+//		the first time (so the instructions and data are not 
+//		in cache), and the second one is measured when the 
+//		function is reinvoked at the same input argument.
+//
+//       ALGORITHM and IMPLEMENTATION NOTES:
+//
+//       slog10d:
+//
+//       Step 0.   If X < 0, create a NaN and raise the invalid operation
+//                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+//       Notes:    Default means round-to-nearest mode, no floating-point
+//                 traps, and precision control = double extended.
+//
+//       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
+//       Notes:    Even if X is denormalized, log(X) is always normalized.
+//
+//       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
+//            2.1  Restore the user FPCR
+//            2.2  Return ans := Y * INV_L10.
+//
+//
+//       slog10: 
+//
+//       Step 0.   If X < 0, create a NaN and raise the invalid operation
+//                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+//       Notes:    Default means round-to-nearest mode, no floating-point
+//                 traps, and precision control = double extended.
+//
+//       Step 1.   Call sLogN to obtain Y = log(X), the natural log of X.
+//
+//       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
+//            2.1  Restore the user FPCR
+//            2.2  Return ans := Y * INV_L10.
+//
+//
+//       sLog2d:
+//
+//       Step 0.   If X < 0, create a NaN and raise the invalid operation
+//                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+//       Notes:    Default means round-to-nearest mode, no floating-point
+//                 traps, and precision control = double extended.
+//
+//       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
+//       Notes:    Even if X is denormalized, log(X) is always normalized.
+//
+//       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).
+//            2.1  Restore the user FPCR
+//            2.2  Return ans := Y * INV_L2.
+//
+//
+//       sLog2:
+//
+//       Step 0.   If X < 0, create a NaN and raise the invalid operation
+//                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+//       Notes:    Default means round-to-nearest mode, no floating-point
+//                 traps, and precision control = double extended.
+//
+//       Step 1.   If X is not an integer power of two, i.e., X != 2^k,
+//                 go to Step 3.
+//
+//       Step 2.   Return k.
+//            2.1  Get integer k, X = 2^k.
+//            2.2  Restore the user FPCR.
+//            2.3  Return ans := convert-to-double-extended(k).
+//
+//       Step 3.   Call sLogN to obtain Y = log(X), the natural log of X.
+//
+//       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).
+//            4.1  Restore the user FPCR
+//            4.2  Return ans := Y * INV_L2.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SLOG2    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	t_frcinx	
+	|xref	t_operr
+	|xref	slogn
+	|xref	slognd
+
+INV_L10:  .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
+
+INV_L2:   .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
+
+	.global	slog10d
+slog10d:
+//--entry point for Log10(X), X is denormalized
+	movel		(%a0),%d0
+	blt		invalid
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slognd			// ...log(X), X denorm.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L10,%fp0
+	bra		t_frcinx
+
+	.global	slog10
+slog10:
+//--entry point for Log10(X), X is normalized
+
+	movel		(%a0),%d0
+	blt		invalid
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slogn			// ...log(X), X normal.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L10,%fp0
+	bra		t_frcinx
+
+
+	.global	slog2d
+slog2d:
+//--entry point for Log2(X), X is denormalized
+
+	movel		(%a0),%d0
+	blt		invalid
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slognd			// ...log(X), X denorm.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L2,%fp0
+	bra		t_frcinx
+
+	.global	slog2
+slog2:
+//--entry point for Log2(X), X is normalized
+	movel		(%a0),%d0
+	blt		invalid
+
+	movel		8(%a0),%d0
+	bnes		continue		// ...X is not 2^k
+
+	movel		4(%a0),%d0
+	andl		#0x7FFFFFFF,%d0
+	tstl		%d0
+	bnes		continue
+
+//--X = 2^k.
+	movew		(%a0),%d0
+	andl		#0x00007FFF,%d0
+	subl		#0x3FFF,%d0
+	fmovel		%d1,%fpcr
+	fmovel		%d0,%fp0
+	bra		t_frcinx
+
+continue:
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slogn			// ...log(X), X normal.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L2,%fp0
+	bra		t_frcinx
+
+invalid:
+	bra		t_operr
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/slogn.s b/c/src/lib/libcpu/m68k/m68040/fpsp/slogn.s
new file mode 100644
index 0000000000..74cb5f99eb
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/slogn.s
@@ -0,0 +1,592 @@
+//
+//	slogn.sa 3.1 12/10/90
+//
+//	slogn computes the natural logarithm of an
+//	input value. slognd does the same except the input value is a
+//	denormalized number. slognp1 computes log(1+X), and slognp1d
+//	computes log(1+X) for denormalized X.
+//
+//	Input: Double-extended value in memory location pointed to by address
+//		register a0.
+//
+//	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 2 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The 
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program slogn takes approximately 190 cycles for input 
+//		argument X such that |X-1| >= 1/16, which is the the usual 
+//		situation. For those arguments, slognp1 takes approximately
+//		 210 cycles. For the less common arguments, the program will
+//		 run no worse than 10% slower.
+//
+//	Algorithm:
+//	LOGN:
+//	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
+//		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
+//
+//	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
+//		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
+//		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
+//
+//	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
+//		log(1+u) = poly.
+//
+//	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
+//		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
+//		beforehand and stored in the program.
+//
+//	lognp1:
+//	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
+//		u where u = 2X/(2+X). Otherwise, move on to Step 2.
+//
+//	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
+//		of the algorithm for LOGN and compute log(1+X) as
+//		k*log(2) + log(F) + poly where poly approximates log(1+u),
+//		u = (Y-F)/F. 
+//
+//	Implementation Notes:
+//	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
+//		need to be tabulated. Moreover, the values of 1/F are also 
+//		tabulated so that the division in (Y-F)/F can be performed by a
+//		multiplication.
+//
+//	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
+//		Y-F has to be calculated carefully when 1/2 <= X < 3/2. 
+//
+//	Note 3. To fully exploit the pipeline, polynomials are usually separated
+//		into two parts evaluated independently before being added up.
+//	
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//slogn	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
+BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
+
+LOGOF2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
+
+one:	.long 0x3F800000
+zero:	.long 0x00000000
+infty:	.long 0x7F800000
+negone:	.long 0xBF800000
+
+LOGA6:	.long 0x3FC2499A,0xB5E4040B
+LOGA5:	.long 0xBFC555B5,0x848CB7DB
+
+LOGA4:	.long 0x3FC99999,0x987D8730
+LOGA3:	.long 0xBFCFFFFF,0xFF6F7E97
+
+LOGA2:	.long 0x3FD55555,0x555555a4
+LOGA1:	.long 0xBFE00000,0x00000008
+
+LOGB5:	.long 0x3F175496,0xADD7DAD6
+LOGB4:	.long 0x3F3C71C2,0xFE80C7E0
+
+LOGB3:	.long 0x3F624924,0x928BCCFF
+LOGB2:	.long 0x3F899999,0x999995EC
+
+LOGB1:	.long 0x3FB55555,0x55555555
+TWO:	.long 0x40000000,0x00000000
+
+LTHOLD:	.long 0x3f990000,0x80000000,0x00000000,0x00000000
+
+LOGTBL:
+	.long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
+	.long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
+	.long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
+	.long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
+	.long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
+	.long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
+	.long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
+	.long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
+	.long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
+	.long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
+	.long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
+	.long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
+	.long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
+	.long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
+	.long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
+	.long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
+	.long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
+	.long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
+	.long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
+	.long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
+	.long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
+	.long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
+	.long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
+	.long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
+	.long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
+	.long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
+	.long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
+	.long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
+	.long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
+	.long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
+	.long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
+	.long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
+	.long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
+	.long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
+	.long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
+	.long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
+	.long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
+	.long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
+	.long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
+	.long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
+	.long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
+	.long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
+	.long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
+	.long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
+	.long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
+	.long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
+	.long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
+	.long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
+	.long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
+	.long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
+	.long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
+	.long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
+	.long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
+	.long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
+	.long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
+	.long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
+	.long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
+	.long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
+	.long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
+	.long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
+	.long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
+	.long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
+	.long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
+	.long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
+	.long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
+	.long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
+	.long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
+	.long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
+	.long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
+	.long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
+	.long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
+	.long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
+	.long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
+	.long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
+	.long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
+	.long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
+	.long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
+	.long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
+	.long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
+	.long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
+	.long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
+	.long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
+	.long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
+	.long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
+	.long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
+	.long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
+	.long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
+	.long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
+	.long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
+	.long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
+	.long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
+	.long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
+	.long  0x3FFE0000,0x94458094,0x45809446,0x00000000
+	.long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
+	.long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
+	.long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
+	.long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
+	.long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
+	.long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
+	.long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
+	.long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
+	.long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
+	.long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
+	.long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
+	.long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
+	.long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
+	.long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
+	.long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
+	.long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
+	.long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
+	.long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
+	.long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
+	.long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
+	.long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
+	.long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
+	.long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
+	.long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
+	.long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
+	.long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
+	.long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
+	.long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
+	.long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
+	.long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
+	.long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
+	.long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
+	.long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
+	.long  0x3FFE0000,0x80808080,0x80808081,0x00000000
+	.long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
+
+	.set	ADJK,L_SCR1
+
+	.set	X,FP_SCR1
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	F,FP_SCR2
+	.set	FFRAC,F+4
+
+	.set	KLOG2,FP_SCR3
+
+	.set	SAVEU,FP_SCR4
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	t_operr
+	|xref	t_dz
+
+	.global	slognd
+slognd:
+//--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
+
+	movel		#-100,ADJK(%a6)	// ...INPUT = 2^(ADJK) * FP0
+
+//----normalize the input value by left shifting k bits (k to be determined
+//----below), adjusting exponent and storing -k to  ADJK
+//----the value TWOTO100 is no longer needed.
+//----Note that this code assumes the denormalized input is NON-ZERO.
+
+     moveml	%d2-%d7,-(%a7)		// ...save some registers 
+     movel	#0x00000000,%d3		// ...D3 is exponent of smallest norm. #
+     movel	4(%a0),%d4
+     movel	8(%a0),%d5		// ...(D4,D5) is (Hi_X,Lo_X)
+     clrl	%d2			// ...D2 used for holding K
+
+     tstl	%d4
+     bnes	HiX_not0
+
+HiX_0:
+     movel	%d5,%d4
+     clrl	%d5
+     movel	#32,%d2
+     clrl	%d6
+     bfffo      %d4{#0:#32},%d6
+     lsll      %d6,%d4
+     addl	%d6,%d2			// ...(D3,D4,D5) is normalized
+
+     movel	%d3,X(%a6)
+     movel	%d4,XFRAC(%a6)
+     movel	%d5,XFRAC+4(%a6)
+     negl	%d2
+     movel	%d2,ADJK(%a6)
+     fmovex	X(%a6),%fp0
+     moveml	(%a7)+,%d2-%d7		// ...restore registers
+     lea	X(%a6),%a0
+     bras	LOGBGN			// ...begin regular log(X)
+
+
+HiX_not0:
+     clrl	%d6
+     bfffo	%d4{#0:#32},%d6		// ...find first 1
+     movel	%d6,%d2			// ...get k
+     lsll	%d6,%d4
+     movel	%d5,%d7			// ...a copy of D5
+     lsll	%d6,%d5
+     negl	%d6
+     addil	#32,%d6
+     lsrl	%d6,%d7
+     orl	%d7,%d4			// ...(D3,D4,D5) normalized
+
+     movel	%d3,X(%a6)
+     movel	%d4,XFRAC(%a6)
+     movel	%d5,XFRAC+4(%a6)
+     negl	%d2
+     movel	%d2,ADJK(%a6)
+     fmovex	X(%a6),%fp0
+     moveml	(%a7)+,%d2-%d7		// ...restore registers
+     lea	X(%a6),%a0
+     bras	LOGBGN			// ...begin regular log(X)
+
+
+	.global	slogn
+slogn:
+//--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
+
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+	movel		#0x00000000,ADJK(%a6)
+
+LOGBGN:
+//--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
+//--A FINITE, NON-ZERO, NORMALIZED NUMBER.
+
+	movel	(%a0),%d0
+	movew	4(%a0),%d0
+
+	movel	(%a0),X(%a6)
+	movel	4(%a0),X+4(%a6)
+	movel	8(%a0),X+8(%a6)
+
+	cmpil	#0,%d0		// ...CHECK IF X IS NEGATIVE
+	blt	LOGNEG		// ...LOG OF NEGATIVE ARGUMENT IS INVALID
+	cmp2l	BOUNDS1,%d0	// ...X IS POSITIVE, CHECK IF X IS NEAR 1
+	bcc	LOGNEAR1	// ...BOUNDS IS ROUGHLY [15/16, 17/16]
+
+LOGMAIN:
+//--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
+
+//--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
+//--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
+//--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
+//--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
+//--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
+//--LOG(1+U) CAN BE VERY EFFICIENT.
+//--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
+//--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 
+
+//--GET K, Y, F, AND ADDRESS OF 1/F.
+	asrl	#8,%d0
+	asrl	#8,%d0		// ...SHIFTED 16 BITS, BIASED EXPO. OF X
+	subil	#0x3FFF,%d0 	// ...THIS IS K
+	addl	ADJK(%a6),%d0	// ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
+	lea	LOGTBL,%a0 	// ...BASE ADDRESS OF 1/F AND LOG(F)
+	fmovel	%d0,%fp1		// ...CONVERT K TO FLOATING-POINT FORMAT
+
+//--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
+	movel	#0x3FFF0000,X(%a6)	// ...X IS NOW Y, I.E. 2^(-K)*X
+	movel	XFRAC(%a6),FFRAC(%a6)
+	andil	#0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y
+	oril	#0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT
+	movel	FFRAC(%a6),%d0	// ...READY TO GET ADDRESS OF 1/F
+	andil	#0x7E000000,%d0	
+	asrl	#8,%d0
+	asrl	#8,%d0
+	asrl	#4,%d0		// ...SHIFTED 20, D0 IS THE DISPLACEMENT
+	addal	%d0,%a0		// ...A0 IS THE ADDRESS FOR 1/F
+
+	fmovex	X(%a6),%fp0
+	movel	#0x3fff0000,F(%a6)
+	clrl	F+8(%a6)
+	fsubx	F(%a6),%fp0		// ...Y-F
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	// ...SAVE FP2 WHILE FP0 IS NOT READY
+//--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
+//--REGISTERS SAVED: FPCR, FP1, FP2
+
+LP1CONT1:
+//--AN RE-ENTRY POINT FOR LOGNP1
+	fmulx	(%a0),%fp0	// ...FP0 IS U = (Y-F)/F
+	fmulx	LOGOF2,%fp1	// ...GET K*LOG2 WHILE FP0 IS NOT READY
+	fmovex	%fp0,%fp2
+	fmulx	%fp2,%fp2		// ...FP2 IS V=U*U
+	fmovex	%fp1,KLOG2(%a6)	// ...PUT K*LOG2 IN MEMORY, FREE FP1
+
+//--LOG(1+U) IS APPROXIMATED BY
+//--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
+//--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
+
+	fmovex	%fp2,%fp3
+	fmovex	%fp2,%fp1	
+
+	fmuld	LOGA6,%fp1	// ...V*A6
+	fmuld	LOGA5,%fp2	// ...V*A5
+
+	faddd	LOGA4,%fp1	// ...A4+V*A6
+	faddd	LOGA3,%fp2	// ...A3+V*A5
+
+	fmulx	%fp3,%fp1		// ...V*(A4+V*A6)
+	fmulx	%fp3,%fp2		// ...V*(A3+V*A5)
+
+	faddd	LOGA2,%fp1	// ...A2+V*(A4+V*A6)
+	faddd	LOGA1,%fp2	// ...A1+V*(A3+V*A5)
+
+	fmulx	%fp3,%fp1		// ...V*(A2+V*(A4+V*A6))
+	addal	#16,%a0		// ...ADDRESS OF LOG(F)
+	fmulx	%fp3,%fp2		// ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
+
+	fmulx	%fp0,%fp1		// ...U*V*(A2+V*(A4+V*A6))
+	faddx	%fp2,%fp0		// ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
+
+	faddx	(%a0),%fp1	// ...LOG(F)+U*V*(A2+V*(A4+V*A6))
+	fmovemx  (%sp)+,%fp2-%fp2/%fp3	// ...RESTORE FP2
+	faddx	%fp1,%fp0		// ...FP0 IS LOG(F) + LOG(1+U)
+
+	fmovel	%d1,%fpcr
+	faddx	KLOG2(%a6),%fp0	// ...FINAL ADD
+	bra	t_frcinx
+
+
+LOGNEAR1:
+//--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
+	fmovex	%fp0,%fp1
+	fsubs	one,%fp1		// ...FP1 IS X-1
+	fadds	one,%fp0		// ...FP0 IS X+1
+	faddx	%fp1,%fp1		// ...FP1 IS 2(X-1)
+//--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
+//--IN U, U = 2(X-1)/(X+1) = FP1/FP0
+
+LP1CONT2:
+//--THIS IS AN RE-ENTRY POINT FOR LOGNP1
+	fdivx	%fp0,%fp1		// ...FP1 IS U
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	 // ...SAVE FP2
+//--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
+//--LET V=U*U, W=V*V, CALCULATE
+//--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
+//--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
+	fmovex	%fp1,%fp0
+	fmulx	%fp0,%fp0	// ...FP0 IS V
+	fmovex	%fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1
+	fmovex	%fp0,%fp1	
+	fmulx	%fp1,%fp1	// ...FP1 IS W
+
+	fmoved	LOGB5,%fp3
+	fmoved	LOGB4,%fp2
+
+	fmulx	%fp1,%fp3	// ...W*B5
+	fmulx	%fp1,%fp2	// ...W*B4
+
+	faddd	LOGB3,%fp3 // ...B3+W*B5
+	faddd	LOGB2,%fp2 // ...B2+W*B4
+
+	fmulx	%fp3,%fp1	// ...W*(B3+W*B5), FP3 RELEASED
+
+	fmulx	%fp0,%fp2	// ...V*(B2+W*B4)
+
+	faddd	LOGB1,%fp1 // ...B1+W*(B3+W*B5)
+	fmulx	SAVEU(%a6),%fp0 // ...FP0 IS U*V
+
+	faddx	%fp2,%fp1	// ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
+	fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED
+
+	fmulx	%fp1,%fp0	// ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
+
+	fmovel	%d1,%fpcr
+	faddx	SAVEU(%a6),%fp0		
+	bra	t_frcinx
+	rts
+
+LOGNEG:
+//--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
+	bra	t_operr
+
+	.global	slognp1d
+slognp1d:
+//--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
+// Simply return the denorm
+
+	bra	t_extdnrm
+
+	.global	slognp1
+slognp1:
+//--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
+
+	fmovex	(%a0),%fp0	// ...LOAD INPUT
+	fabsx	%fp0		//test magnitude
+	fcmpx	LTHOLD,%fp0	//compare with min threshold
+	fbgt	LP1REAL		//if greater, continue
+	fmovel	#0,%fpsr		//clr N flag from compare
+	fmovel	%d1,%fpcr
+	fmovex	(%a0),%fp0	//return signed argument
+	bra	t_frcinx
+
+LP1REAL:
+	fmovex	(%a0),%fp0	// ...LOAD INPUT
+	movel	#0x00000000,ADJK(%a6)
+	fmovex	%fp0,%fp1	// ...FP1 IS INPUT Z
+	fadds	one,%fp0	// ...X := ROUND(1+Z)
+	fmovex	%fp0,X(%a6)
+	movew	XFRAC(%a6),XDCARE(%a6)
+	movel	X(%a6),%d0
+	cmpil	#0,%d0
+	ble	LP1NEG0	// ...LOG OF ZERO OR -VE
+	cmp2l	BOUNDS2,%d0
+	bcs	LOGMAIN	// ...BOUNDS2 IS [1/2,3/2]
+//--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
+//--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
+//--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
+
+LP1NEAR1:
+//--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
+	cmp2l	BOUNDS1,%d0
+	bcss	LP1CARE
+
+LP1ONE16:
+//--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
+//--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
+	faddx	%fp1,%fp1	// ...FP1 IS 2Z
+	fadds	one,%fp0	// ...FP0 IS 1+X
+//--U = FP1/FP0
+	bra	LP1CONT2
+
+LP1CARE:
+//--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
+//--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
+//--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
+//--THERE ARE ONLY TWO CASES.
+//--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
+//--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
+//--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
+//--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
+
+	movel	XFRAC(%a6),FFRAC(%a6)
+	andil	#0xFE000000,FFRAC(%a6)
+	oril	#0x01000000,FFRAC(%a6)	// ...F OBTAINED
+	cmpil	#0x3FFF8000,%d0	// ...SEE IF 1+Z > 1
+	bges	KISZERO
+
+KISNEG1:
+	fmoves	TWO,%fp0
+	movel	#0x3fff0000,F(%a6)
+	clrl	F+8(%a6)
+	fsubx	F(%a6),%fp0	// ...2-F
+	movel	FFRAC(%a6),%d0
+	andil	#0x7E000000,%d0
+	asrl	#8,%d0
+	asrl	#8,%d0
+	asrl	#4,%d0		// ...D0 CONTAINS DISPLACEMENT FOR 1/F
+	faddx	%fp1,%fp1		// ...GET 2Z
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	// ...SAVE FP2 
+	faddx	%fp1,%fp0		// ...FP0 IS Y-F = (2-F)+2Z
+	lea	LOGTBL,%a0	// ...A0 IS ADDRESS OF 1/F
+	addal	%d0,%a0
+	fmoves	negone,%fp1	// ...FP1 IS K = -1
+	bra	LP1CONT1
+
+KISZERO:
+	fmoves	one,%fp0
+	movel	#0x3fff0000,F(%a6)
+	clrl	F+8(%a6)
+	fsubx	F(%a6),%fp0		// ...1-F
+	movel	FFRAC(%a6),%d0
+	andil	#0x7E000000,%d0
+	asrl	#8,%d0
+	asrl	#8,%d0
+	asrl	#4,%d0
+	faddx	%fp1,%fp0		// ...FP0 IS Y-F
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	// ...FP2 SAVED
+	lea	LOGTBL,%a0
+	addal	%d0,%a0	 	// ...A0 IS ADDRESS OF 1/F
+	fmoves	zero,%fp1	// ...FP1 IS K = 0
+	bra	LP1CONT1
+
+LP1NEG0:
+//--FPCR SAVED. D0 IS X IN COMPACT FORM.
+	cmpil	#0,%d0
+	blts	LP1NEG
+LP1ZERO:
+	fmoves	negone,%fp0
+
+	fmovel	%d1,%fpcr
+	bra t_dz
+
+LP1NEG:
+	fmoves	zero,%fp0
+
+	fmovel	%d1,%fpcr
+	bra	t_operr
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/smovecr.s b/c/src/lib/libcpu/m68k/m68040/fpsp/smovecr.s
new file mode 100644
index 0000000000..cf45b2ec4d
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/smovecr.s
@@ -0,0 +1,162 @@
+//
+//	smovecr.sa 3.1 12/10/90
+//
+//	The entry point sMOVECR returns the constant at the
+//	offset given in the instruction field.
+//
+//	Input: An offset in the instruction word.
+//
+//	Output:	The constant rounded to the user's rounding
+//		mode unchecked for overflow.
+//
+//	Modified: fp0.
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SMOVECR	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section 8
+
+	.include "fpsp.defs"
+
+	|xref	nrm_set
+	|xref	round
+	|xref	PIRN
+	|xref	PIRZRM
+	|xref	PIRP
+	|xref	SMALRN
+	|xref	SMALRZRM
+	|xref	SMALRP
+	|xref	BIGRN
+	|xref	BIGRZRM
+	|xref	BIGRP
+
+FZERO:	.long	00000000
+//
+//	FMOVECR 
+//
+	.global	smovcr
+smovcr:
+	bfextu	CMDREG1B(%a6){#9:#7},%d0 //get offset
+	bfextu	USER_FPCR(%a6){#26:#2},%d1 //get rmode
+//
+// check range of offset
+//
+	tstb	%d0		//if zero, offset is to pi
+	beqs	PI_TBL		//it is pi
+	cmpib	#0x0a,%d0		//check range $01 - $0a
+	bles	Z_VAL		//if in this range, return zero
+	cmpib	#0x0e,%d0		//check range $0b - $0e
+	bles	SM_TBL		//valid constants in this range
+	cmpib	#0x2f,%d0		//check range $10 - $2f
+	bles	Z_VAL		//if in this range, return zero 
+	cmpib	#0x3f,%d0		//check range $30 - $3f
+	ble  	BG_TBL		//valid constants in this range
+Z_VAL:
+	fmoves	FZERO,%fp0
+	rts
+PI_TBL:
+	tstb	%d1		//offset is zero, check for rmode
+	beqs	PI_RN		//if zero, rn mode
+	cmpib	#0x3,%d1		//check for rp
+	beqs	PI_RP		//if 3, rp mode
+PI_RZRM:
+	leal	PIRZRM,%a0	//rmode is rz or rm, load PIRZRM in a0
+	bra	set_finx
+PI_RN:
+	leal	PIRN,%a0		//rmode is rn, load PIRN in a0
+	bra	set_finx
+PI_RP:
+	leal	PIRP,%a0		//rmode is rp, load PIRP in a0
+	bra	set_finx
+SM_TBL:
+	subil	#0xb,%d0		//make offset in 0 - 4 range
+	tstb	%d1		//check for rmode
+	beqs	SM_RN		//if zero, rn mode
+	cmpib	#0x3,%d1		//check for rp
+	beqs	SM_RP		//if 3, rp mode
+SM_RZRM:
+	leal	SMALRZRM,%a0	//rmode is rz or rm, load SMRZRM in a0
+	cmpib	#0x2,%d0		//check if result is inex
+	ble	set_finx	//if 0 - 2, it is inexact
+	bra	no_finx		//if 3, it is exact
+SM_RN:
+	leal	SMALRN,%a0	//rmode is rn, load SMRN in a0
+	cmpib	#0x2,%d0		//check if result is inex
+	ble	set_finx	//if 0 - 2, it is inexact
+	bra	no_finx		//if 3, it is exact
+SM_RP:
+	leal	SMALRP,%a0	//rmode is rp, load SMRP in a0
+	cmpib	#0x2,%d0		//check if result is inex
+	ble	set_finx	//if 0 - 2, it is inexact
+	bra	no_finx		//if 3, it is exact
+BG_TBL:
+	subil	#0x30,%d0		//make offset in 0 - f range
+	tstb	%d1		//check for rmode
+	beqs	BG_RN		//if zero, rn mode
+	cmpib	#0x3,%d1		//check for rp
+	beqs	BG_RP		//if 3, rp mode
+BG_RZRM:
+	leal	BIGRZRM,%a0	//rmode is rz or rm, load BGRZRM in a0
+	cmpib	#0x1,%d0		//check if result is inex
+	ble	set_finx	//if 0 - 1, it is inexact
+	cmpib	#0x7,%d0		//second check
+	ble	no_finx		//if 0 - 7, it is exact
+	bra	set_finx	//if 8 - f, it is inexact
+BG_RN:
+	leal	BIGRN,%a0	//rmode is rn, load BGRN in a0
+	cmpib	#0x1,%d0		//check if result is inex
+	ble	set_finx	//if 0 - 1, it is inexact
+	cmpib	#0x7,%d0		//second check
+	ble	no_finx		//if 0 - 7, it is exact
+	bra	set_finx	//if 8 - f, it is inexact
+BG_RP:
+	leal	BIGRP,%a0	//rmode is rp, load SMRP in a0
+	cmpib	#0x1,%d0		//check if result is inex
+	ble	set_finx	//if 0 - 1, it is inexact
+	cmpib	#0x7,%d0		//second check
+	ble	no_finx		//if 0 - 7, it is exact
+//	bra	set_finx	;if 8 - f, it is inexact
+set_finx:
+	orl	#inx2a_mask,USER_FPSR(%a6) //set inex2/ainex
+no_finx:
+	mulul	#12,%d0			//use offset to point into tables
+	movel	%d1,L_SCR1(%a6)		//load mode for round call
+	bfextu	USER_FPCR(%a6){#24:#2},%d1	//get precision
+	tstl	%d1			//check if extended precision
+//
+// Precision is extended
+//
+	bnes	not_ext			//if extended, do not call round
+	fmovemx (%a0,%d0),%fp0-%fp0		//return result in fp0
+	rts
+//
+// Precision is single or double
+//
+not_ext:
+	swap	%d1			//rnd prec in upper word of d1
+	addl	L_SCR1(%a6),%d1		//merge rmode in low word of d1
+	movel	(%a0,%d0),FP_SCR1(%a6)	//load first word to temp storage
+	movel	4(%a0,%d0),FP_SCR1+4(%a6)	//load second word
+	movel	8(%a0,%d0),FP_SCR1+8(%a6)	//load third word
+	clrl	%d0			//clear g,r,s
+	lea	FP_SCR1(%a6),%a0
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		//convert to internal ext. format
+	
+	bsr	round			//go round the mantissa
+
+	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
+	beqs	fin_fcr
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+fin_fcr:
+	fmovemx (%a0),%fp0-%fp0
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/srem_mod.s b/c/src/lib/libcpu/m68k/m68040/fpsp/srem_mod.s
new file mode 100644
index 0000000000..57ee7ed4b3
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/srem_mod.s
@@ -0,0 +1,422 @@
+//
+//	srem_mod.sa 3.1 12/10/90
+//
+//      The entry point sMOD computes the floating point MOD of the
+//      input values X and Y. The entry point sREM computes the floating
+//      point (IEEE) REM of the input values X and Y.
+//
+//      INPUT
+//      -----
+//      Double-extended value Y is pointed to by address in register
+//      A0. Double-extended value X is located in -12(A0). The values
+//      of X and Y are both nonzero and finite; although either or both
+//      of them can be denormalized. The special cases of zeros, NaNs,
+//      and infinities are handled elsewhere.
+//
+//      OUTPUT
+//      ------
+//      FREM(X,Y) or FMOD(X,Y), depending on entry point.
+//
+//       ALGORITHM
+//       ---------
+//
+//       Step 1.  Save and strip signs of X and Y: signX := sign(X),
+//                signY := sign(Y), X := |X|, Y := |Y|, 
+//                signQ := signX EOR signY. Record whether MOD or REM
+//                is requested.
+//
+//       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
+//                If (L < 0) then
+//                   R := X, go to Step 4.
+//                else
+//                   R := 2^(-L)X, j := L.
+//                endif
+//
+//       Step 3.  Perform MOD(X,Y)
+//            3.1 If R = Y, go to Step 9.
+//            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
+//            3.3 If j = 0, go to Step 4.
+//            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
+//                Step 3.1.
+//
+//       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
+//                Last_Subtract := false (used in Step 7 below). If
+//                MOD is requested, go to Step 6. 
+//
+//       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
+//            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
+//                Step 6.
+//            5.2 If R > Y/2, then { set Last_Subtract := true,
+//                Q := Q + 1, Y := signY*Y }. Go to Step 6.
+//            5.3 This is the tricky case of R = Y/2. If Q is odd,
+//                then { Q := Q + 1, signX := -signX }.
+//
+//       Step 6.  R := signX*R.
+//
+//       Step 7.  If Last_Subtract = true, R := R - Y.
+//
+//       Step 8.  Return signQ, last 7 bits of Q, and R as required.
+//
+//       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
+//                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
+//                R := 0. Return signQ, last 7 bits of Q, and R.
+//
+//                
+             
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+SREM_MOD:    //idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section    8
+
+	.include "fpsp.defs"
+
+	.set	Mod_Flag,L_SCR3
+	.set	SignY,FP_SCR3+4
+	.set	SignX,FP_SCR3+8
+	.set	SignQ,FP_SCR3+12
+	.set	Sc_Flag,FP_SCR4
+
+	.set	Y,FP_SCR1
+	.set	Y_Hi,Y+4
+	.set	Y_Lo,Y+8
+
+	.set	R,FP_SCR2
+	.set	R_Hi,R+4
+	.set	R_Lo,R+8
+
+
+Scale:     .long	0x00010000,0x80000000,0x00000000,0x00000000
+
+	|xref	t_avoid_unsupp
+
+        .global        smod
+smod:
+
+   movel               #0,Mod_Flag(%a6)
+   bras                Mod_Rem
+
+        .global        srem
+srem:
+
+   movel               #1,Mod_Flag(%a6)
+
+Mod_Rem:
+//..Save sign of X and Y
+   moveml              %d2-%d7,-(%a7)     // ...save data registers
+   movew               (%a0),%d3
+   movew               %d3,SignY(%a6)
+   andil               #0x00007FFF,%d3   // ...Y := |Y|
+
+//
+   movel               4(%a0),%d4
+   movel               8(%a0),%d5        // ...(D3,D4,D5) is |Y|
+
+   tstl                %d3
+   bnes                Y_Normal
+
+   movel               #0x00003FFE,%d3	// ...$3FFD + 1
+   tstl                %d4
+   bnes                HiY_not0
+
+HiY_0:
+   movel               %d5,%d4
+   clrl                %d5
+   subil               #32,%d3
+   clrl                %d6
+   bfffo                %d4{#0:#32},%d6
+   lsll                %d6,%d4
+   subl                %d6,%d3           // ...(D3,D4,D5) is normalized
+//                                       ...with bias $7FFD
+   bras                Chk_X
+
+HiY_not0:
+   clrl                %d6
+   bfffo                %d4{#0:#32},%d6
+   subl                %d6,%d3
+   lsll                %d6,%d4
+   movel               %d5,%d7           // ...a copy of D5
+   lsll                %d6,%d5
+   negl                %d6
+   addil               #32,%d6
+   lsrl                %d6,%d7
+   orl                 %d7,%d4           // ...(D3,D4,D5) normalized
+//                                       ...with bias $7FFD
+   bras                Chk_X
+
+Y_Normal:
+   addil               #0x00003FFE,%d3   // ...(D3,D4,D5) normalized
+//                                       ...with bias $7FFD
+
+Chk_X:
+   movew               -12(%a0),%d0
+   movew               %d0,SignX(%a6)
+   movew               SignY(%a6),%d1
+   eorl                %d0,%d1
+   andil               #0x00008000,%d1
+   movew               %d1,SignQ(%a6)	// ...sign(Q) obtained
+   andil               #0x00007FFF,%d0
+   movel               -8(%a0),%d1
+   movel               -4(%a0),%d2       // ...(D0,D1,D2) is |X|
+   tstl                %d0
+   bnes                X_Normal
+   movel               #0x00003FFE,%d0
+   tstl                %d1
+   bnes                HiX_not0
+
+HiX_0:
+   movel               %d2,%d1
+   clrl                %d2
+   subil               #32,%d0
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   lsll                %d6,%d1
+   subl                %d6,%d0           // ...(D0,D1,D2) is normalized
+//                                       ...with bias $7FFD
+   bras                Init
+
+HiX_not0:
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   subl                %d6,%d0
+   lsll                %d6,%d1
+   movel               %d2,%d7           // ...a copy of D2
+   lsll                %d6,%d2
+   negl                %d6
+   addil               #32,%d6
+   lsrl                %d6,%d7
+   orl                 %d7,%d1           // ...(D0,D1,D2) normalized
+//                                       ...with bias $7FFD
+   bras                Init
+
+X_Normal:
+   addil               #0x00003FFE,%d0   // ...(D0,D1,D2) normalized
+//                                       ...with bias $7FFD
+
+Init:
+//
+   movel               %d3,L_SCR1(%a6)   // ...save biased expo(Y)
+   movel		%d0,L_SCR2(%a6)	//save d0
+   subl                %d3,%d0           // ...L := expo(X)-expo(Y)
+//   Move.L               D0,L            ...D0 is j
+   clrl                %d6              // ...D6 := carry <- 0
+   clrl                %d3              // ...D3 is Q
+   moveal              #0,%a1           // ...A1 is k; j+k=L, Q=0
+
+//..(Carry,D1,D2) is R
+   tstl                %d0
+   bges                Mod_Loop
+
+//..expo(X) < expo(Y). Thus X = mod(X,Y)
+//
+   movel		L_SCR2(%a6),%d0	//restore d0
+   bra                Get_Mod
+
+//..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
+
+
+Mod_Loop:
+   tstl                %d6              // ...test carry bit
+   bgts                R_GT_Y
+
+//..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
+   cmpl                %d4,%d1           // ...compare hi(R) and hi(Y)
+   bnes                R_NE_Y
+   cmpl                %d5,%d2           // ...compare lo(R) and lo(Y)
+   bnes                R_NE_Y
+
+//..At this point, R = Y
+   bra                Rem_is_0
+
+R_NE_Y:
+//..use the borrow of the previous compare
+   bcss                R_LT_Y          // ...borrow is set iff R < Y
+
+R_GT_Y:
+//..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
+//..and Y < (D1,D2) < 2Y. Either way, perform R - Y
+   subl                %d5,%d2           // ...lo(R) - lo(Y)
+   subxl               %d4,%d1           // ...hi(R) - hi(Y)
+   clrl                %d6              // ...clear carry
+   addql               #1,%d3           // ...Q := Q + 1
+
+R_LT_Y:
+//..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
+   tstl                %d0              // ...see if j = 0.
+   beqs                PostLoop
+
+   addl                %d3,%d3           // ...Q := 2Q
+   addl                %d2,%d2           // ...lo(R) = 2lo(R)
+   roxll               #1,%d1           // ...hi(R) = 2hi(R) + carry
+   scs                  %d6              // ...set Carry if 2(R) overflows
+   addql               #1,%a1           // ...k := k+1
+   subql               #1,%d0           // ...j := j - 1
+//..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
+
+   bras                Mod_Loop
+
+PostLoop:
+//..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
+
+//..normalize R.
+   movel               L_SCR1(%a6),%d0           // ...new biased expo of R
+   tstl                %d1
+   bnes                HiR_not0
+
+HiR_0:
+   movel               %d2,%d1
+   clrl                %d2
+   subil               #32,%d0
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   lsll                %d6,%d1
+   subl                %d6,%d0           // ...(D0,D1,D2) is normalized
+//                                       ...with bias $7FFD
+   bras                Get_Mod
+
+HiR_not0:
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   bmis                Get_Mod         // ...already normalized
+   subl                %d6,%d0
+   lsll                %d6,%d1
+   movel               %d2,%d7           // ...a copy of D2
+   lsll                %d6,%d2
+   negl                %d6
+   addil               #32,%d6
+   lsrl                %d6,%d7
+   orl                 %d7,%d1           // ...(D0,D1,D2) normalized
+
+//
+Get_Mod:
+   cmpil		#0x000041FE,%d0
+   bges		No_Scale
+Do_Scale:
+   movew		%d0,R(%a6)
+   clrw		R+2(%a6)
+   movel		%d1,R_Hi(%a6)
+   movel		%d2,R_Lo(%a6)
+   movel		L_SCR1(%a6),%d6
+   movew		%d6,Y(%a6)
+   clrw		Y+2(%a6)
+   movel		%d4,Y_Hi(%a6)
+   movel		%d5,Y_Lo(%a6)
+   fmovex		R(%a6),%fp0		// ...no exception
+   movel		#1,Sc_Flag(%a6)
+   bras		ModOrRem
+No_Scale:
+   movel		%d1,R_Hi(%a6)
+   movel		%d2,R_Lo(%a6)
+   subil		#0x3FFE,%d0
+   movew		%d0,R(%a6)
+   clrw		R+2(%a6)
+   movel		L_SCR1(%a6),%d6
+   subil		#0x3FFE,%d6
+   movel		%d6,L_SCR1(%a6)
+   fmovex		R(%a6),%fp0
+   movew		%d6,Y(%a6)
+   movel		%d4,Y_Hi(%a6)
+   movel		%d5,Y_Lo(%a6)
+   movel		#0,Sc_Flag(%a6)
+
+//
+
+
+ModOrRem:
+   movel               Mod_Flag(%a6),%d6
+   beqs                Fix_Sign
+
+   movel               L_SCR1(%a6),%d6           // ...new biased expo(Y)
+   subql               #1,%d6           // ...biased expo(Y/2)
+   cmpl                %d6,%d0
+   blts                Fix_Sign
+   bgts                Last_Sub
+
+   cmpl                %d4,%d1
+   bnes                Not_EQ
+   cmpl                %d5,%d2
+   bnes                Not_EQ
+   bra                Tie_Case
+
+Not_EQ:
+   bcss                Fix_Sign
+
+Last_Sub:
+//
+   fsubx		Y(%a6),%fp0		// ...no exceptions
+   addql               #1,%d3           // ...Q := Q + 1
+
+//
+
+Fix_Sign:
+//..Get sign of X
+   movew               SignX(%a6),%d6
+   bges		Get_Q
+   fnegx		%fp0
+
+//..Get Q
+//
+Get_Q:
+   clrl		%d6		
+   movew               SignQ(%a6),%d6        // ...D6 is sign(Q)
+   movel               #8,%d7
+   lsrl                %d7,%d6           
+   andil               #0x0000007F,%d3   // ...7 bits of Q
+   orl                 %d6,%d3           // ...sign and bits of Q
+   swap                 %d3
+   fmovel              %fpsr,%d6
+   andil               #0xFF00FFFF,%d6
+   orl                 %d3,%d6
+   fmovel              %d6,%fpsr         // ...put Q in fpsr
+
+//
+Restore:
+   moveml              (%a7)+,%d2-%d7
+   fmovel              USER_FPCR(%a6),%fpcr
+   movel               Sc_Flag(%a6),%d0
+   beqs                Finish
+   fmulx		Scale(%pc),%fp0	// ...may cause underflow
+   bra			t_avoid_unsupp	//check for denorm as a
+//					;result of the scaling
+
+Finish:
+	fmovex		%fp0,%fp0		//capture exceptions & round
+	rts
+
+Rem_is_0:
+//..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
+   addql               #1,%d3
+   cmpil               #8,%d0           // ...D0 is j 
+   bges                Q_Big
+
+   lsll                %d0,%d3
+   bras                Set_R_0
+
+Q_Big:
+   clrl                %d3
+
+Set_R_0:
+   fmoves		#0x00000000,%fp0
+   movel		#0,Sc_Flag(%a6)
+   bra                Fix_Sign
+
+Tie_Case:
+//..Check parity of Q
+   movel               %d3,%d6
+   andil               #0x00000001,%d6
+   tstl                %d6
+   beq                Fix_Sign	// ...Q is even
+
+//..Q is odd, Q := Q + 1, signX := -signX
+   addql               #1,%d3
+   movew               SignX(%a6),%d6
+   eoril               #0x00008000,%d6
+   movew               %d6,SignX(%a6)
+   bra                Fix_Sign
+
+   //end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/ssin.s b/c/src/lib/libcpu/m68k/m68040/fpsp/ssin.s
new file mode 100644
index 0000000000..9c12a55cba
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/ssin.s
@@ -0,0 +1,746 @@
+//
+//	ssin.sa 3.3 7/29/91
+//
+//	The entry point sSIN computes the sine of an input argument
+//	sCOS computes the cosine, and sSINCOS computes both. The
+//	corresponding entry points with a "d" computes the same
+//	corresponding function values for denormalized inputs.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
+//		COS is requested. Otherwise, for SINCOS, sin(X) is returned
+//		in Fp0, and cos(X) is returned in Fp1.
+//
+//	Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
+//
+//	Accuracy and Monotonicity: The returned result is within 1 ulp in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The
+//		result is provably monotonic in double precision.
+//
+//	Speed: The programs sSIN and sCOS take approximately 150 cycles for
+//		input argument X such that |X| < 15Pi, which is the the usual
+//		situation. The speed for sSINCOS is approximately 190 cycles.
+//
+//	Algorithm:
+//
+//	SIN and COS:
+//	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
+//
+//	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
+//
+//	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+//		k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
+//		k by k := k + AdjN.
+//
+//	4. If k is even, go to 6.
+//
+//	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
+//		where cos(r) is approximated by an even polynomial in r,
+//		1 + r*r*(B1+s*(B2+ ... + s*B8)),	s = r*r.
+//		Exit.
+//
+//	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
+//		where sin(r) is approximated by an odd polynomial in r
+//		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.
+//		Exit.
+//
+//	7. If |X| > 1, go to 9.
+//
+//	8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
+//
+//	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
+//
+//	SINCOS:
+//	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
+//
+//	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+//		k = N mod 4, so in particular, k = 0,1,2,or 3.
+//
+//	3. If k is even, go to 5.
+//
+//	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
+//		j1 exclusive or with the l.s.b. of k.
+//		sgn1 := (-1)**j1, sgn2 := (-1)**j2.
+//		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
+//		sin(r) and cos(r) are computed as odd and even polynomials
+//		in r, respectively. Exit
+//
+//	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
+//		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
+//		sin(r) and cos(r) are computed as odd and even polynomials
+//		in r, respectively. Exit
+//
+//	6. If |X| > 1, go to 8.
+//
+//	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
+//
+//	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SSIN	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+BOUNDS1:	.long 0x3FD78000,0x4004BC7E
+TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
+
+SINA7:	.long 0xBD6AAA77,0xCCC994F5
+SINA6:	.long 0x3DE61209,0x7AAE8DA1
+
+SINA5:	.long 0xBE5AE645,0x2A118AE4
+SINA4:	.long 0x3EC71DE3,0xA5341531
+
+SINA3:	.long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
+
+SINA2:	.long 0x3FF80000,0x88888888,0x888859AF,0x00000000
+
+SINA1:	.long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
+
+COSB8:	.long 0x3D2AC4D0,0xD6011EE3
+COSB7:	.long 0xBDA9396F,0x9F45AC19
+
+COSB6:	.long 0x3E21EED9,0x0612C972
+COSB5:	.long 0xBE927E4F,0xB79D9FCF
+
+COSB4:	.long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
+
+COSB3:	.long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
+
+COSB2:	.long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
+COSB1:	.long 0xBF000000
+
+INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
+
+TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
+TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
+
+	|xref	PITBL
+
+	.set	INARG,FP_SCR4
+
+	.set	X,FP_SCR5
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	RPRIME,FP_SCR1
+	.set	SPRIME,FP_SCR2
+
+	.set	POSNEG1,L_SCR1
+	.set	TWOTO63,L_SCR1
+
+	.set	ENDFLAG,L_SCR2
+	.set	N,L_SCR2
+
+	.set	ADJN,L_SCR3
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	sto_cos
+
+	.global	ssind
+ssind:
+//--SIN(X) = X FOR DENORMALIZED X
+	bra		t_extdnrm
+
+	.global	scosd
+scosd:
+//--COS(X) = 1 FOR DENORMALIZED X
+
+	fmoves		#0x3F800000,%fp0
+//
+//	9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
+//
+	fmovel		#0,%fpsr
+//
+	bra		t_frcinx
+
+	.global	ssin
+ssin:
+//--SET ADJN TO 0
+	movel		#0,ADJN(%a6)
+	bras		SINBGN
+
+	.global	scos
+scos:
+//--SET ADJN TO 1
+	movel		#1,ADJN(%a6)
+
+SINBGN:
+//--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
+
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0		// ...COMPACTIFY X
+
+	cmpil		#0x3FD78000,%d0		// ...|X| >= 2**(-40)?
+	bges		SOK1
+	bra		SINSM
+
+SOK1:
+	cmpil		#0x4004BC7E,%d0		// ...|X| < 15 PI?
+	blts		SINMAIN
+	bra		REDUCEX
+
+SINMAIN:
+//--THIS IS THE USUAL CASE, |X| <= 15 PI.
+//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+	fmovex		%fp0,%fp1
+	fmuld		TWOBYPI,%fp1	// ...X*2/PI
+
+//--HIDE THE NEXT THREE INSTRUCTIONS
+	lea		PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
+	
+
+//--FP1 IS NOW READY
+	fmovel		%fp1,N(%a6)		// ...CONVERT TO INTEGER
+
+	movel		N(%a6),%d0
+	asll		#4,%d0
+	addal		%d0,%a1	// ...A1 IS THE ADDRESS OF N*PIBY2
+//				...WHICH IS IN TWO PIECES Y1 & Y2
+
+	fsubx		(%a1)+,%fp0	// ...X-Y1
+//--HIDE THE NEXT ONE
+	fsubs		(%a1),%fp0	// ...FP0 IS R = (X-Y1)-Y2
+
+SINCONT:
+//--continuation from REDUCEX
+
+//--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
+	movel		N(%a6),%d0
+	addl		ADJN(%a6),%d0	// ...SEE IF D0 IS ODD OR EVEN
+	rorl		#1,%d0	// ...D0 WAS ODD IFF D0 IS NEGATIVE
+	cmpil		#0,%d0
+	blt		COSPOLY
+
+SINPOLY:
+//--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
+//--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
+//--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
+//--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
+//--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
+//--WHERE T=S*S.
+//--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
+//--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
+	fmovex		%fp0,X(%a6)	// ...X IS R
+	fmulx		%fp0,%fp0	// ...FP0 IS S
+//---HIDE THE NEXT TWO WHILE WAITING FOR FP0
+	fmoved		SINA7,%fp3
+	fmoved		SINA6,%fp2
+//--FP0 IS NOW READY
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1	// ...FP1 IS T
+//--HIDE THE NEXT TWO WHILE WAITING FOR FP1
+
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+//				...LEAST SIG. BIT OF D0 IN SIGN POSITION
+	eorl		%d0,X(%a6)	// ...X IS NOW R'= SGN*R
+
+	fmulx		%fp1,%fp3	// ...TA7
+	fmulx		%fp1,%fp2	// ...TA6
+
+	faddd		SINA5,%fp3 // ...A5+TA7
+	faddd		SINA4,%fp2 // ...A4+TA6
+
+	fmulx		%fp1,%fp3	// ...T(A5+TA7)
+	fmulx		%fp1,%fp2	// ...T(A4+TA6)
+
+	faddd		SINA3,%fp3 // ...A3+T(A5+TA7)
+	faddx		SINA2,%fp2 // ...A2+T(A4+TA6)
+
+	fmulx		%fp3,%fp1	// ...T(A3+T(A5+TA7))
+
+	fmulx		%fp0,%fp2	// ...S(A2+T(A4+TA6))
+	faddx		SINA1,%fp1 // ...A1+T(A3+T(A5+TA7))
+	fmulx		X(%a6),%fp0	// ...R'*S
+
+	faddx		%fp2,%fp1	// ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
+//--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
+//--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
+	
+
+	fmulx		%fp1,%fp0		// ...SIN(R')-R'
+//--FP1 RELEASED.
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	faddx		X(%a6),%fp0		//last inst - possible exception set
+	bra		t_frcinx
+
+
+COSPOLY:
+//--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
+//--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
+//--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
+//--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
+//--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
+//--WHERE T=S*S.
+//--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
+//--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
+//--AND IS THEREFORE STORED AS SINGLE PRECISION.
+
+	fmulx		%fp0,%fp0	// ...FP0 IS S
+//---HIDE THE NEXT TWO WHILE WAITING FOR FP0
+	fmoved		COSB8,%fp2
+	fmoved		COSB7,%fp3
+//--FP0 IS NOW READY
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1	// ...FP1 IS T
+//--HIDE THE NEXT TWO WHILE WAITING FOR FP1
+	fmovex		%fp0,X(%a6)	// ...X IS S
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+//			...LEAST SIG. BIT OF D0 IN SIGN POSITION
+
+	fmulx		%fp1,%fp2	// ...TB8
+//--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
+	eorl		%d0,X(%a6)	// ...X IS NOW S'= SGN*S
+	andil		#0x80000000,%d0
+
+	fmulx		%fp1,%fp3	// ...TB7
+//--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
+	oril		#0x3F800000,%d0	// ...D0 IS SGN IN SINGLE
+	movel		%d0,POSNEG1(%a6)
+
+	faddd		COSB6,%fp2 // ...B6+TB8
+	faddd		COSB5,%fp3 // ...B5+TB7
+
+	fmulx		%fp1,%fp2	// ...T(B6+TB8)
+	fmulx		%fp1,%fp3	// ...T(B5+TB7)
+
+	faddd		COSB4,%fp2 // ...B4+T(B6+TB8)
+	faddx		COSB3,%fp3 // ...B3+T(B5+TB7)
+
+	fmulx		%fp1,%fp2	// ...T(B4+T(B6+TB8))
+	fmulx		%fp3,%fp1	// ...T(B3+T(B5+TB7))
+
+	faddx		COSB2,%fp2 // ...B2+T(B4+T(B6+TB8))
+	fadds		COSB1,%fp1 // ...B1+T(B3+T(B5+TB7))
+
+	fmulx		%fp2,%fp0	// ...S(B2+T(B4+T(B6+TB8)))
+//--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
+//--FP2 RELEASED.
+	
+
+	faddx		%fp1,%fp0
+//--FP1 RELEASED
+
+	fmulx		X(%a6),%fp0
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fadds		POSNEG1(%a6),%fp0	//last inst - possible exception set
+	bra		t_frcinx
+
+
+SINBORS:
+//--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
+//--IF |X| < 2**(-40), RETURN X OR 1.
+	cmpil		#0x3FFF8000,%d0
+	bgts		REDUCEX
+        
+
+SINSM:
+	movel		ADJN(%a6),%d0
+	cmpil		#0,%d0
+	bgts		COSTINY
+
+SINTINY:
+	movew		#0x0000,XDCARE(%a6)	// ...JUST IN CASE
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fmovex		X(%a6),%fp0		//last inst - possible exception set
+	bra		t_frcinx
+
+
+COSTINY:
+	fmoves		#0x3F800000,%fp0
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fsubs		#0x00800000,%fp0	//last inst - possible exception set
+	bra		t_frcinx
+
+
+REDUCEX:
+//--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
+//--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
+//--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
+
+	fmovemx	%fp2-%fp5,-(%a7)	// ...save FP2 through FP5
+	movel		%d2,-(%a7)
+        fmoves         #0x00000000,%fp1
+//--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
+//--there is a danger of unwanted overflow in first LOOP iteration.  In this
+//--case, reduce argument by one remainder step to make subsequent reduction
+//--safe.
+	cmpil	#0x7ffeffff,%d0		//is argument dangerously large?
+	bnes	LOOP
+	movel	#0x7ffe0000,FP_SCR2(%a6)	//yes
+//					;create 2**16383*PI/2
+	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
+	clrl	FP_SCR2+8(%a6)
+	ftstx	%fp0			//test sign of argument
+	movel	#0x7fdc0000,FP_SCR3(%a6)	//create low half of 2**16383*
+//					;PI/2 at FP_SCR3
+	movel	#0x85a308d3,FP_SCR3+4(%a6)
+	clrl   FP_SCR3+8(%a6)
+	fblt	red_neg
+	orw	#0x8000,FP_SCR2(%a6)	//positive arg
+	orw	#0x8000,FP_SCR3(%a6)
+red_neg:
+	faddx  FP_SCR2(%a6),%fp0		//high part of reduction is exact
+	fmovex  %fp0,%fp1		//save high result in fp1
+	faddx  FP_SCR3(%a6),%fp0		//low part of reduction
+	fsubx  %fp0,%fp1			//determine low component of result
+	faddx  FP_SCR3(%a6),%fp1		//fp0/fp1 are reduced argument.
+
+//--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
+//--integer quotient will be stored in N
+//--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
+
+LOOP:
+	fmovex		%fp0,INARG(%a6)	// ...+-2**K * F, 1 <= F < 2
+	movew		INARG(%a6),%d0
+        movel          %d0,%a1		// ...save a copy of D0
+	andil		#0x00007FFF,%d0
+	subil		#0x00003FFF,%d0	// ...D0 IS K
+	cmpil		#28,%d0
+	bles		LASTLOOP
+CONTLOOP:
+	subil		#27,%d0	 // ...D0 IS L := K-27
+	movel		#0,ENDFLAG(%a6)
+	bras		WORK
+LASTLOOP:
+	clrl		%d0		// ...D0 IS L := 0
+	movel		#1,ENDFLAG(%a6)
+
+WORK:
+//--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
+//--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
+
+//--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
+//--2**L * (PIby2_1), 2**L * (PIby2_2)
+
+	movel		#0x00003FFE,%d2	// ...BIASED EXPO OF 2/PI
+	subl		%d0,%d2		// ...BIASED EXPO OF 2**(-L)*(2/PI)
+
+	movel		#0xA2F9836E,FP_SCR1+4(%a6)
+	movel		#0x4E44152A,FP_SCR1+8(%a6)
+	movew		%d2,FP_SCR1(%a6)	// ...FP_SCR1 is 2**(-L)*(2/PI)
+
+	fmovex		%fp0,%fp2
+	fmulx		FP_SCR1(%a6),%fp2
+//--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
+//--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
+//--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
+//--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
+//--US THE DESIRED VALUE IN FLOATING POINT.
+
+//--HIDE SIX CYCLES OF INSTRUCTION
+        movel		%a1,%d2
+        swap		%d2
+	andil		#0x80000000,%d2
+	oril		#0x5F000000,%d2	// ...D2 IS SIGN(INARG)*2**63 IN SGL
+	movel		%d2,TWOTO63(%a6)
+
+	movel		%d0,%d2
+	addil		#0x00003FFF,%d2	// ...BIASED EXPO OF 2**L * (PI/2)
+
+//--FP2 IS READY
+	fadds		TWOTO63(%a6),%fp2	// ...THE FRACTIONAL PART OF FP1 IS ROUNDED
+
+//--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
+        movew		%d2,FP_SCR2(%a6)
+	clrw           FP_SCR2+2(%a6)
+	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
+	clrl		FP_SCR2+8(%a6)		// ...FP_SCR2 is  2**(L) * Piby2_1	
+
+//--FP2 IS READY
+	fsubs		TWOTO63(%a6),%fp2		// ...FP2 is N
+
+	addil		#0x00003FDD,%d0
+        movew		%d0,FP_SCR3(%a6)
+	clrw           FP_SCR3+2(%a6)
+	movel		#0x85A308D3,FP_SCR3+4(%a6)
+	clrl		FP_SCR3+8(%a6)		// ...FP_SCR3 is 2**(L) * Piby2_2
+
+	movel		ENDFLAG(%a6),%d0
+
+//--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
+//--P2 = 2**(L) * Piby2_2
+	fmovex		%fp2,%fp4
+	fmulx		FP_SCR2(%a6),%fp4		// ...W = N*P1
+	fmovex		%fp2,%fp5
+	fmulx		FP_SCR3(%a6),%fp5		// ...w = N*P2
+	fmovex		%fp4,%fp3
+//--we want P+p = W+w  but  |p| <= half ulp of P
+//--Then, we need to compute  A := R-P   and  a := r-p
+	faddx		%fp5,%fp3			// ...FP3 is P
+	fsubx		%fp3,%fp4			// ...W-P
+
+	fsubx		%fp3,%fp0			// ...FP0 is A := R - P
+        faddx		%fp5,%fp4			// ...FP4 is p = (W-P)+w
+
+	fmovex		%fp0,%fp3			// ...FP3 A
+	fsubx		%fp4,%fp1			// ...FP1 is a := r - p
+
+//--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
+//--|r| <= half ulp of R.
+	faddx		%fp1,%fp0			// ...FP0 is R := A+a
+//--No need to calculate r if this is the last loop
+	cmpil		#0,%d0
+	bgt		RESTORE
+
+//--Need to calculate r
+	fsubx		%fp0,%fp3			// ...A-R
+	faddx		%fp3,%fp1			// ...FP1 is r := (A-R)+a
+	bra		LOOP
+
+RESTORE:
+        fmovel		%fp2,N(%a6)
+	movel		(%a7)+,%d2
+	fmovemx	(%a7)+,%fp2-%fp5
+
+	
+	movel		ADJN(%a6),%d0
+	cmpil		#4,%d0
+
+	blt		SINCONT
+	bras		SCCONT
+
+	.global	ssincosd
+ssincosd:
+//--SIN AND COS OF X FOR DENORMALIZED X
+
+	fmoves		#0x3F800000,%fp1
+	bsr		sto_cos		//store cosine result
+	bra		t_extdnrm
+
+	.global	ssincos
+ssincos:
+//--SET ADJN TO 4
+	movel		#4,ADJN(%a6)
+
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0		// ...COMPACTIFY X
+
+	cmpil		#0x3FD78000,%d0		// ...|X| >= 2**(-40)?
+	bges		SCOK1
+	bra		SCSM
+
+SCOK1:
+	cmpil		#0x4004BC7E,%d0		// ...|X| < 15 PI?
+	blts		SCMAIN
+	bra		REDUCEX
+
+
+SCMAIN:
+//--THIS IS THE USUAL CASE, |X| <= 15 PI.
+//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+	fmovex		%fp0,%fp1
+	fmuld		TWOBYPI,%fp1	// ...X*2/PI
+
+//--HIDE THE NEXT THREE INSTRUCTIONS
+	lea		PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
+	
+
+//--FP1 IS NOW READY
+	fmovel		%fp1,N(%a6)		// ...CONVERT TO INTEGER
+
+	movel		N(%a6),%d0
+	asll		#4,%d0
+	addal		%d0,%a1		// ...ADDRESS OF N*PIBY2, IN Y1, Y2
+
+	fsubx		(%a1)+,%fp0	// ...X-Y1
+        fsubs		(%a1),%fp0	// ...FP0 IS R = (X-Y1)-Y2
+
+SCCONT:
+//--continuation point from REDUCEX
+
+//--HIDE THE NEXT TWO
+	movel		N(%a6),%d0
+	rorl		#1,%d0
+	
+	cmpil		#0,%d0		// ...D0 < 0 IFF N IS ODD
+	bge		NEVEN
+
+NODD:
+//--REGISTERS SAVED SO FAR: D0, A0, FP2.
+
+	fmovex		%fp0,RPRIME(%a6)
+	fmulx		%fp0,%fp0	 // ...FP0 IS S = R*R
+	fmoved		SINA7,%fp1	// ...A7
+	fmoved		COSB8,%fp2	// ...B8
+	fmulx		%fp0,%fp1	 // ...SA7
+	movel		%d2,-(%a7)
+	movel		%d0,%d2
+	fmulx		%fp0,%fp2	 // ...SB8
+	rorl		#1,%d2
+	andil		#0x80000000,%d2
+
+	faddd		SINA6,%fp1	// ...A6+SA7
+	eorl		%d0,%d2
+	andil		#0x80000000,%d2
+	faddd		COSB7,%fp2	// ...B7+SB8
+
+	fmulx		%fp0,%fp1	 // ...S(A6+SA7)
+	eorl		%d2,RPRIME(%a6)
+	movel		(%a7)+,%d2
+	fmulx		%fp0,%fp2	 // ...S(B7+SB8)
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+
+	faddd		SINA5,%fp1	// ...A5+S(A6+SA7)
+	movel		#0x3F800000,POSNEG1(%a6)
+	eorl		%d0,POSNEG1(%a6)
+	faddd		COSB6,%fp2	// ...B6+S(B7+SB8)
+
+	fmulx		%fp0,%fp1	 // ...S(A5+S(A6+SA7))
+	fmulx		%fp0,%fp2	 // ...S(B6+S(B7+SB8))
+	fmovex		%fp0,SPRIME(%a6)
+
+	faddd		SINA4,%fp1	// ...A4+S(A5+S(A6+SA7))
+	eorl		%d0,SPRIME(%a6)
+	faddd		COSB5,%fp2	// ...B5+S(B6+S(B7+SB8))
+
+	fmulx		%fp0,%fp1	 // ...S(A4+...)
+	fmulx		%fp0,%fp2	 // ...S(B5+...)
+
+	faddd		SINA3,%fp1	// ...A3+S(A4+...)
+	faddd		COSB4,%fp2	// ...B4+S(B5+...)
+
+	fmulx		%fp0,%fp1	 // ...S(A3+...)
+	fmulx		%fp0,%fp2	 // ...S(B4+...)
+
+	faddx		SINA2,%fp1	// ...A2+S(A3+...)
+	faddx		COSB3,%fp2	// ...B3+S(B4+...)
+
+	fmulx		%fp0,%fp1	 // ...S(A2+...)
+	fmulx		%fp0,%fp2	 // ...S(B3+...)
+
+	faddx		SINA1,%fp1	// ...A1+S(A2+...)
+	faddx		COSB2,%fp2	// ...B2+S(B3+...)
+
+	fmulx		%fp0,%fp1	 // ...S(A1+...)
+	fmulx		%fp2,%fp0	 // ...S(B2+...)
+
+	
+
+	fmulx		RPRIME(%a6),%fp1	// ...R'S(A1+...)
+	fadds		COSB1,%fp0	// ...B1+S(B2...)
+	fmulx		SPRIME(%a6),%fp0	// ...S'(B1+S(B2+...))
+
+	movel		%d1,-(%sp)	//restore users mode & precision
+	andil		#0xff,%d1		//mask off all exceptions
+	fmovel		%d1,%FPCR
+	faddx		RPRIME(%a6),%fp1	// ...COS(X)
+	bsr		sto_cos		//store cosine result
+	fmovel		(%sp)+,%FPCR	//restore users exceptions
+	fadds		POSNEG1(%a6),%fp0	// ...SIN(X)
+
+	bra		t_frcinx
+
+
+NEVEN:
+//--REGISTERS SAVED SO FAR: FP2.
+
+	fmovex		%fp0,RPRIME(%a6)
+	fmulx		%fp0,%fp0	 // ...FP0 IS S = R*R
+	fmoved		COSB8,%fp1			// ...B8
+	fmoved		SINA7,%fp2			// ...A7
+	fmulx		%fp0,%fp1	 // ...SB8
+	fmovex		%fp0,SPRIME(%a6)
+	fmulx		%fp0,%fp2	 // ...SA7
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+	faddd		COSB7,%fp1	// ...B7+SB8
+	faddd		SINA6,%fp2	// ...A6+SA7
+	eorl		%d0,RPRIME(%a6)
+	eorl		%d0,SPRIME(%a6)
+	fmulx		%fp0,%fp1	 // ...S(B7+SB8)
+	oril		#0x3F800000,%d0
+	movel		%d0,POSNEG1(%a6)
+	fmulx		%fp0,%fp2	 // ...S(A6+SA7)
+
+	faddd		COSB6,%fp1	// ...B6+S(B7+SB8)
+	faddd		SINA5,%fp2	// ...A5+S(A6+SA7)
+
+	fmulx		%fp0,%fp1	 // ...S(B6+S(B7+SB8))
+	fmulx		%fp0,%fp2	 // ...S(A5+S(A6+SA7))
+
+	faddd		COSB5,%fp1	// ...B5+S(B6+S(B7+SB8))
+	faddd		SINA4,%fp2	// ...A4+S(A5+S(A6+SA7))
+
+	fmulx		%fp0,%fp1	 // ...S(B5+...)
+	fmulx		%fp0,%fp2	 // ...S(A4+...)
+
+	faddd		COSB4,%fp1	// ...B4+S(B5+...)
+	faddd		SINA3,%fp2	// ...A3+S(A4+...)
+
+	fmulx		%fp0,%fp1	 // ...S(B4+...)
+	fmulx		%fp0,%fp2	 // ...S(A3+...)
+
+	faddx		COSB3,%fp1	// ...B3+S(B4+...)
+	faddx		SINA2,%fp2	// ...A2+S(A3+...)
+
+	fmulx		%fp0,%fp1	 // ...S(B3+...)
+	fmulx		%fp0,%fp2	 // ...S(A2+...)
+
+	faddx		COSB2,%fp1	// ...B2+S(B3+...)
+	faddx		SINA1,%fp2	// ...A1+S(A2+...)
+
+	fmulx		%fp0,%fp1	 // ...S(B2+...)
+	fmulx		%fp2,%fp0	 // ...s(a1+...)
+
+	
+
+	fadds		COSB1,%fp1	// ...B1+S(B2...)
+	fmulx		RPRIME(%a6),%fp0	// ...R'S(A1+...)
+	fmulx		SPRIME(%a6),%fp1	// ...S'(B1+S(B2+...))
+
+	movel		%d1,-(%sp)	//save users mode & precision
+	andil		#0xff,%d1		//mask off all exceptions
+	fmovel		%d1,%FPCR
+	fadds		POSNEG1(%a6),%fp1	// ...COS(X)
+	bsr		sto_cos		//store cosine result
+	fmovel		(%sp)+,%FPCR	//restore users exceptions
+	faddx		RPRIME(%a6),%fp0	// ...SIN(X)
+
+	bra		t_frcinx
+
+SCBORS:
+	cmpil		#0x3FFF8000,%d0
+	bgt		REDUCEX
+        
+
+SCSM:
+	movew		#0x0000,XDCARE(%a6)
+	fmoves		#0x3F800000,%fp1
+
+	movel		%d1,-(%sp)	//save users mode & precision
+	andil		#0xff,%d1		//mask off all exceptions
+	fmovel		%d1,%FPCR
+	fsubs		#0x00800000,%fp1
+	bsr		sto_cos		//store cosine result
+	fmovel		(%sp)+,%FPCR	//restore users exceptions
+	fmovex		X(%a6),%fp0
+	bra		t_frcinx
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/ssinh.s b/c/src/lib/libcpu/m68k/m68040/fpsp/ssinh.s
new file mode 100644
index 0000000000..3fb689441a
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/ssinh.s
@@ -0,0 +1,135 @@
+//
+//	ssinh.sa 3.1 12/10/90
+//
+//       The entry point sSinh computes the hyperbolic sine of
+//       an input argument; sSinhd does the same except for denormalized
+//       input.
+//
+//       Input: Double-extended number X in location pointed to 
+//		by address register a0.
+//
+//       Output: The value sinh(X) returned in floating-point register Fp0.
+//
+//       Accuracy and Monotonicity: The returned result is within 3 ulps in
+//               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//               result is subsequently rounded to double precision. The
+//               result is provably monotonic in double precision.
+//
+//       Speed: The program sSINH takes approximately 280 cycles.
+//
+//       Algorithm:
+//
+//       SINH
+//       1. If |X| > 16380 log2, go to 3.
+//
+//       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
+//               y = |X|, sgn = sign(X), and z = expm1(Y),
+//               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
+//          Exit.
+//
+//       3. If |X| > 16480 log2, go to 5.
+//
+//       4. (16380 log2 < |X| <= 16480 log2)
+//               sinh(X) = sign(X) * exp(|X|)/2.
+//          However, invoking exp(|X|) may cause premature overflow.
+//          Thus, we calculate sinh(X) as follows:
+//             Y       := |X|
+//             sgn     := sign(X)
+//             sgnFact := sgn * 2**(16380)
+//             Y'      := Y - 16381 log2
+//             sinh(X) := sgnFact * exp(Y').
+//          Exit.
+//
+//       5. (|X| > 16480 log2) sinh(X) must overflow. Return
+//          sign(X)*Huge*Huge to generate overflow and an infinity with
+//          the appropriate sign. Huge is the largest finite number in
+//          extended format. Exit.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//SSINH	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+T1:	.long 0x40C62D38,0xD3D64634 // ... 16381 LOG2 LEAD
+T2:	.long 0x3D6F90AE,0xB1E75CC7 // ... 16381 LOG2 TRAIL
+
+	|xref	t_frcinx
+	|xref	t_ovfl
+	|xref	t_extdnrm
+	|xref	setox
+	|xref	setoxm1
+
+	.global	ssinhd
+ssinhd:
+//--SINH(X) = X FOR DENORMALIZED X
+
+	bra	t_extdnrm
+
+	.global	ssinh
+ssinh:
+	fmovex	(%a0),%fp0	// ...LOAD INPUT
+
+	movel	(%a0),%d0
+	movew	4(%a0),%d0
+	movel	%d0,%a1		// save a copy of original (compacted) operand
+	andl	#0x7FFFFFFF,%d0
+	cmpl	#0x400CB167,%d0
+	bgts	SINHBIG
+
+//--THIS IS THE USUAL CASE, |X| < 16380 LOG2
+//--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
+
+	fabsx	%fp0		// ...Y = |X|
+
+	moveml	%a1/%d1,-(%sp)
+	fmovemx %fp0-%fp0,(%a0)
+	clrl	%d1
+	bsr	setoxm1	 	// ...FP0 IS Z = EXPM1(Y)
+	fmovel	#0,%fpcr
+	moveml	(%sp)+,%a1/%d1
+
+	fmovex	%fp0,%fp1
+	fadds	#0x3F800000,%fp1	// ...1+Z
+	fmovex	%fp0,-(%sp)
+	fdivx	%fp1,%fp0		// ...Z/(1+Z)
+	movel	%a1,%d0
+	andl	#0x80000000,%d0
+	orl	#0x3F000000,%d0
+	faddx	(%sp)+,%fp0
+	movel	%d0,-(%sp)
+
+	fmovel	%d1,%fpcr
+	fmuls	(%sp)+,%fp0	//last fp inst - possible exceptions set
+
+	bra	t_frcinx
+
+SINHBIG:
+	cmpl	#0x400CB2B3,%d0
+	bgt	t_ovfl
+	fabsx	%fp0
+	fsubd	T1(%pc),%fp0	// ...(|X|-16381LOG2_LEAD)
+	movel	#0,-(%sp)
+	movel	#0x80000000,-(%sp)
+	movel	%a1,%d0
+	andl	#0x80000000,%d0
+	orl	#0x7FFB0000,%d0
+	movel	%d0,-(%sp)	// ...EXTENDED FMT
+	fsubd	T2(%pc),%fp0	// ...|X| - 16381 LOG2, ACCURATE
+
+	movel	%d1,-(%sp)
+	clrl	%d1
+	fmovemx %fp0-%fp0,(%a0)
+	bsr	setox
+	fmovel	(%sp)+,%fpcr
+
+	fmulx	(%sp)+,%fp0	//possible exception
+	bra	t_frcinx
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/stan.s b/c/src/lib/libcpu/m68k/m68040/fpsp/stan.s
new file mode 100644
index 0000000000..c2b8047823
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/stan.s
@@ -0,0 +1,455 @@
+//
+//	stan.sa 3.3 7/29/91
+//
+//	The entry point stan computes the tangent of
+//	an input argument;
+//	stand does the same except for denormalized input.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The value tan(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 3 ulp in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program sTAN takes approximately 170 cycles for
+//		input argument X such that |X| < 15Pi, which is the the usual
+//		situation.
+//
+//	Algorithm:
+//
+//	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
+//
+//	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+//		k = N mod 2, so in particular, k = 0 or 1.
+//
+//	3. If k is odd, go to 5.
+//
+//	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
+//		rational function U/V where
+//		U = r + r*s*(P1 + s*(P2 + s*P3)), and
+//		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
+//		Exit.
+//
+//	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
+//		rational function U/V where
+//		U = r + r*s*(P1 + s*(P2 + s*P3)), and
+//		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
+//		-Cot(r) = -V/U. Exit.
+//
+//	6. If |X| > 1, go to 8.
+//
+//	7. (|X|<2**(-40)) Tan(X) = X. Exit.
+//
+//	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//STAN	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+BOUNDS1:	.long 0x3FD78000,0x4004BC7E
+TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
+
+TANQ4:	.long 0x3EA0B759,0xF50F8688
+TANP3:	.long 0xBEF2BAA5,0xA8924F04
+
+TANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
+
+TANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
+
+TANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
+
+TANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
+
+TANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
+
+INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
+
+TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
+TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
+
+//--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
+//--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
+//--MOST 69 BITS LONG.
+	.global	PITBL
+PITBL:
+  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
+  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
+  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
+  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
+  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
+  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
+  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
+  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
+  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
+  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
+  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
+  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
+  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
+  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
+  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
+  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
+  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
+  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
+  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
+  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
+  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
+  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
+  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
+  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
+  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
+  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
+  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
+  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
+  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
+  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
+  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
+  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
+  .long  0x00000000,0x00000000,0x00000000,0x00000000
+  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
+  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
+  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
+  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
+  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
+  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
+  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
+  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
+  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
+  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
+  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
+  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
+  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
+  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
+  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
+  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
+  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
+  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
+  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
+  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
+  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
+  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
+  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
+  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
+  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
+  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
+  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
+  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
+  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
+  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
+  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
+  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
+
+	.set	INARG,FP_SCR4
+
+	.set	TWOTO63,L_SCR1
+	.set	ENDFLAG,L_SCR2
+	.set	N,L_SCR3
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+
+	.global	stand
+stand:
+//--TAN(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	stan
+stan:
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FD78000,%d0		// ...|X| >= 2**(-40)?
+	bges		TANOK1
+	bra		TANSM
+TANOK1:
+	cmpil		#0x4004BC7E,%d0		// ...|X| < 15 PI?
+	blts		TANMAIN
+	bra		REDUCEX
+
+
+TANMAIN:
+//--THIS IS THE USUAL CASE, |X| <= 15 PI.
+//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+	fmovex		%fp0,%fp1
+	fmuld		TWOBYPI,%fp1	// ...X*2/PI
+
+//--HIDE THE NEXT TWO INSTRUCTIONS
+	leal		PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
+
+//--FP1 IS NOW READY
+	fmovel		%fp1,%d0		// ...CONVERT TO INTEGER
+
+	asll		#4,%d0
+	addal		%d0,%a1		// ...ADDRESS N*PIBY2 IN Y1, Y2
+
+	fsubx		(%a1)+,%fp0	// ...X-Y1
+//--HIDE THE NEXT ONE
+
+	fsubs		(%a1),%fp0	// ...FP0 IS R = (X-Y1)-Y2
+
+	rorl		#5,%d0
+	andil		#0x80000000,%d0	// ...D0 WAS ODD IFF D0 < 0
+
+TANCONT:
+
+	cmpil		#0,%d0
+	blt		NODD
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1	 	// ...S = R*R
+
+	fmoved		TANQ4,%fp3
+	fmoved		TANP3,%fp2
+
+	fmulx		%fp1,%fp3	 	// ...SQ4
+	fmulx		%fp1,%fp2	 	// ...SP3
+
+	faddd		TANQ3,%fp3	// ...Q3+SQ4
+	faddx		TANP2,%fp2	// ...P2+SP3
+
+	fmulx		%fp1,%fp3	 	// ...S(Q3+SQ4)
+	fmulx		%fp1,%fp2	 	// ...S(P2+SP3)
+
+	faddx		TANQ2,%fp3	// ...Q2+S(Q3+SQ4)
+	faddx		TANP1,%fp2	// ...P1+S(P2+SP3)
+
+	fmulx		%fp1,%fp3	 	// ...S(Q2+S(Q3+SQ4))
+	fmulx		%fp1,%fp2	 	// ...S(P1+S(P2+SP3))
+
+	faddx		TANQ1,%fp3	// ...Q1+S(Q2+S(Q3+SQ4))
+	fmulx		%fp0,%fp2	 	// ...RS(P1+S(P2+SP3))
+
+	fmulx		%fp3,%fp1	 	// ...S(Q1+S(Q2+S(Q3+SQ4)))
+	
+
+	faddx		%fp2,%fp0	 	// ...R+RS(P1+S(P2+SP3))
+	
+
+	fadds		#0x3F800000,%fp1	// ...1+S(Q1+...)
+
+	fmovel		%d1,%fpcr		//restore users exceptions
+	fdivx		%fp1,%fp0		//last inst - possible exception set
+
+	bra		t_frcinx
+
+NODD:
+	fmovex		%fp0,%fp1
+	fmulx		%fp0,%fp0	 	// ...S = R*R
+
+	fmoved		TANQ4,%fp3
+	fmoved		TANP3,%fp2
+
+	fmulx		%fp0,%fp3	 	// ...SQ4
+	fmulx		%fp0,%fp2	 	// ...SP3
+
+	faddd		TANQ3,%fp3	// ...Q3+SQ4
+	faddx		TANP2,%fp2	// ...P2+SP3
+
+	fmulx		%fp0,%fp3	 	// ...S(Q3+SQ4)
+	fmulx		%fp0,%fp2	 	// ...S(P2+SP3)
+
+	faddx		TANQ2,%fp3	// ...Q2+S(Q3+SQ4)
+	faddx		TANP1,%fp2	// ...P1+S(P2+SP3)
+
+	fmulx		%fp0,%fp3	 	// ...S(Q2+S(Q3+SQ4))
+	fmulx		%fp0,%fp2	 	// ...S(P1+S(P2+SP3))
+
+	faddx		TANQ1,%fp3	// ...Q1+S(Q2+S(Q3+SQ4))
+	fmulx		%fp1,%fp2	 	// ...RS(P1+S(P2+SP3))
+
+	fmulx		%fp3,%fp0	 	// ...S(Q1+S(Q2+S(Q3+SQ4)))
+	
+
+	faddx		%fp2,%fp1	 	// ...R+RS(P1+S(P2+SP3))
+	fadds		#0x3F800000,%fp0	// ...1+S(Q1+...)
+	
+
+	fmovex		%fp1,-(%sp)
+	eoril		#0x80000000,(%sp)
+
+	fmovel		%d1,%fpcr	 	//restore users exceptions
+	fdivx		(%sp)+,%fp0	//last inst - possible exception set
+
+	bra		t_frcinx
+
+TANBORS:
+//--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
+//--IF |X| < 2**(-40), RETURN X OR 1.
+	cmpil		#0x3FFF8000,%d0
+	bgts		REDUCEX
+
+TANSM:
+
+	fmovex		%fp0,-(%sp)
+	fmovel		%d1,%fpcr		 //restore users exceptions
+	fmovex		(%sp)+,%fp0	//last inst - possible exception set
+
+	bra		t_frcinx
+
+
+REDUCEX:
+//--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
+//--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
+//--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
+
+	fmovemx	%fp2-%fp5,-(%a7)	// ...save FP2 through FP5
+	movel		%d2,-(%a7)
+        fmoves         #0x00000000,%fp1
+
+//--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
+//--there is a danger of unwanted overflow in first LOOP iteration.  In this
+//--case, reduce argument by one remainder step to make subsequent reduction
+//--safe.
+	cmpil	#0x7ffeffff,%d0		//is argument dangerously large?
+	bnes	LOOP
+	movel	#0x7ffe0000,FP_SCR2(%a6)	//yes
+//					;create 2**16383*PI/2
+	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
+	clrl	FP_SCR2+8(%a6)
+	ftstx	%fp0			//test sign of argument
+	movel	#0x7fdc0000,FP_SCR3(%a6)	//create low half of 2**16383*
+//					;PI/2 at FP_SCR3
+	movel	#0x85a308d3,FP_SCR3+4(%a6)
+	clrl   FP_SCR3+8(%a6)
+	fblt	red_neg
+	orw	#0x8000,FP_SCR2(%a6)	//positive arg
+	orw	#0x8000,FP_SCR3(%a6)
+red_neg:
+	faddx  FP_SCR2(%a6),%fp0		//high part of reduction is exact
+	fmovex  %fp0,%fp1		//save high result in fp1
+	faddx  FP_SCR3(%a6),%fp0		//low part of reduction
+	fsubx  %fp0,%fp1			//determine low component of result
+	faddx  FP_SCR3(%a6),%fp1		//fp0/fp1 are reduced argument.
+
+//--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
+//--integer quotient will be stored in N
+//--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
+
+LOOP:
+	fmovex		%fp0,INARG(%a6)	// ...+-2**K * F, 1 <= F < 2
+	movew		INARG(%a6),%d0
+        movel          %d0,%a1		// ...save a copy of D0
+	andil		#0x00007FFF,%d0
+	subil		#0x00003FFF,%d0	// ...D0 IS K
+	cmpil		#28,%d0
+	bles		LASTLOOP
+CONTLOOP:
+	subil		#27,%d0	 // ...D0 IS L := K-27
+	movel		#0,ENDFLAG(%a6)
+	bras		WORK
+LASTLOOP:
+	clrl		%d0		// ...D0 IS L := 0
+	movel		#1,ENDFLAG(%a6)
+
+WORK:
+//--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
+//--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
+
+//--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
+//--2**L * (PIby2_1), 2**L * (PIby2_2)
+
+	movel		#0x00003FFE,%d2	// ...BIASED EXPO OF 2/PI
+	subl		%d0,%d2		// ...BIASED EXPO OF 2**(-L)*(2/PI)
+
+	movel		#0xA2F9836E,FP_SCR1+4(%a6)
+	movel		#0x4E44152A,FP_SCR1+8(%a6)
+	movew		%d2,FP_SCR1(%a6)	// ...FP_SCR1 is 2**(-L)*(2/PI)
+
+	fmovex		%fp0,%fp2
+	fmulx		FP_SCR1(%a6),%fp2
+//--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
+//--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
+//--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
+//--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
+//--US THE DESIRED VALUE IN FLOATING POINT.
+
+//--HIDE SIX CYCLES OF INSTRUCTION
+        movel		%a1,%d2
+        swap		%d2
+	andil		#0x80000000,%d2
+	oril		#0x5F000000,%d2	// ...D2 IS SIGN(INARG)*2**63 IN SGL
+	movel		%d2,TWOTO63(%a6)
+
+	movel		%d0,%d2
+	addil		#0x00003FFF,%d2	// ...BIASED EXPO OF 2**L * (PI/2)
+
+//--FP2 IS READY
+	fadds		TWOTO63(%a6),%fp2	// ...THE FRACTIONAL PART OF FP1 IS ROUNDED
+
+//--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
+        movew		%d2,FP_SCR2(%a6)
+	clrw           FP_SCR2+2(%a6)
+	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
+	clrl		FP_SCR2+8(%a6)		// ...FP_SCR2 is  2**(L) * Piby2_1	
+
+//--FP2 IS READY
+	fsubs		TWOTO63(%a6),%fp2		// ...FP2 is N
+
+	addil		#0x00003FDD,%d0
+        movew		%d0,FP_SCR3(%a6)
+	clrw           FP_SCR3+2(%a6)
+	movel		#0x85A308D3,FP_SCR3+4(%a6)
+	clrl		FP_SCR3+8(%a6)		// ...FP_SCR3 is 2**(L) * Piby2_2
+
+	movel		ENDFLAG(%a6),%d0
+
+//--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
+//--P2 = 2**(L) * Piby2_2
+	fmovex		%fp2,%fp4
+	fmulx		FP_SCR2(%a6),%fp4		// ...W = N*P1
+	fmovex		%fp2,%fp5
+	fmulx		FP_SCR3(%a6),%fp5		// ...w = N*P2
+	fmovex		%fp4,%fp3
+//--we want P+p = W+w  but  |p| <= half ulp of P
+//--Then, we need to compute  A := R-P   and  a := r-p
+	faddx		%fp5,%fp3			// ...FP3 is P
+	fsubx		%fp3,%fp4			// ...W-P
+
+	fsubx		%fp3,%fp0			// ...FP0 is A := R - P
+        faddx		%fp5,%fp4			// ...FP4 is p = (W-P)+w
+
+	fmovex		%fp0,%fp3			// ...FP3 A
+	fsubx		%fp4,%fp1			// ...FP1 is a := r - p
+
+//--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
+//--|r| <= half ulp of R.
+	faddx		%fp1,%fp0			// ...FP0 is R := A+a
+//--No need to calculate r if this is the last loop
+	cmpil		#0,%d0
+	bgt		RESTORE
+
+//--Need to calculate r
+	fsubx		%fp0,%fp3			// ...A-R
+	faddx		%fp3,%fp1			// ...FP1 is r := (A-R)+a
+	bra		LOOP
+
+RESTORE:
+        fmovel		%fp2,N(%a6)
+	movel		(%a7)+,%d2
+	fmovemx	(%a7)+,%fp2-%fp5
+
+	
+	movel		N(%a6),%d0
+        rorl		#1,%d0
+
+
+	bra		TANCONT
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/stanh.s b/c/src/lib/libcpu/m68k/m68040/fpsp/stanh.s
new file mode 100644
index 0000000000..d2601cafa7
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/stanh.s
@@ -0,0 +1,185 @@
+//
+//	stanh.sa 3.1 12/10/90
+//
+//	The entry point sTanh computes the hyperbolic tangent of
+//	an input argument; sTanhd does the same except for denormalized
+//	input.
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The value tanh(X) returned in floating-point register Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 3 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program stanh takes approximately 270 cycles.
+//
+//	Algorithm:
+//
+//	TANH
+//	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
+//
+//	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
+//		sgn := sign(X), y := 2|X|, z := expm1(Y), and
+//		tanh(X) = sgn*( z/(2+z) ).
+//		Exit.
+//
+//	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
+//		go to 7.
+//
+//	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
+//
+//	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
+//		sgn := sign(X), y := 2|X|, z := exp(Y),
+//		tanh(X) = sgn - [ sgn*2/(1+z) ].
+//		Exit.
+//
+//	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
+//		calculate Tanh(X) by
+//		sgn := sign(X), Tiny := 2**(-126),
+//		tanh(X) := sgn - sgn*Tiny.
+//		Exit.
+//
+//	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//STANH	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+	
+	.include "fpsp.defs"
+
+	.set	X,FP_SCR5
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	SGN,L_SCR3
+
+	.set	V,FP_SCR6
+
+BOUNDS1:	.long 0x3FD78000,0x3FFFDDCE // ... 2^(-40), (5/2)LOG2
+
+	|xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	setox
+	|xref	setoxm1
+
+	.global	stanhd
+stanhd:
+//--TANH(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	stanh
+stanh:
+	fmovex		(%a0),%fp0	// ...LOAD INPUT
+
+	fmovex		%fp0,X(%a6)
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	movel		%d0,X(%a6)
+	andl		#0x7FFFFFFF,%d0
+	cmp2l		BOUNDS1(%pc),%d0	// ...2**(-40) < |X| < (5/2)LOG2 ?
+	bcss		TANHBORS
+
+//--THIS IS THE USUAL CASE
+//--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
+
+	movel		X(%a6),%d0
+	movel		%d0,SGN(%a6)
+	andl		#0x7FFF0000,%d0
+	addl		#0x00010000,%d0	// ...EXPONENT OF 2|X|
+	movel		%d0,X(%a6)
+	andl		#0x80000000,SGN(%a6)
+	fmovex		X(%a6),%fp0		// ...FP0 IS Y = 2|X|
+
+	movel		%d1,-(%a7)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		setoxm1	 	// ...FP0 IS Z = EXPM1(Y)
+	movel		(%a7)+,%d1
+
+	fmovex		%fp0,%fp1
+	fadds		#0x40000000,%fp1	// ...Z+2
+	movel		SGN(%a6),%d0
+	fmovex		%fp1,V(%a6)
+	eorl		%d0,V(%a6)
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fdivx		V(%a6),%fp0
+	bra		t_frcinx
+
+TANHBORS:
+	cmpl		#0x3FFF8000,%d0
+	blt		TANHSM
+
+	cmpl		#0x40048AA1,%d0
+	bgt		TANHHUGE
+
+//-- (5/2) LOG2 < |X| < 50 LOG2,
+//--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
+//--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
+
+	movel		X(%a6),%d0
+	movel		%d0,SGN(%a6)
+	andl		#0x7FFF0000,%d0
+	addl		#0x00010000,%d0	// ...EXPO OF 2|X|
+	movel		%d0,X(%a6)		// ...Y = 2|X|
+	andl		#0x80000000,SGN(%a6)
+	movel		SGN(%a6),%d0
+	fmovex		X(%a6),%fp0		// ...Y = 2|X|
+
+	movel		%d1,-(%a7)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		setox		// ...FP0 IS EXP(Y)
+	movel		(%a7)+,%d1
+	movel		SGN(%a6),%d0
+	fadds		#0x3F800000,%fp0	// ...EXP(Y)+1
+
+	eorl		#0xC0000000,%d0	// ...-SIGN(X)*2
+	fmoves		%d0,%fp1		// ...-SIGN(X)*2 IN SGL FMT
+	fdivx		%fp0,%fp1	 	// ...-SIGN(X)2 / [EXP(Y)+1 ]
+
+	movel		SGN(%a6),%d0
+	orl		#0x3F800000,%d0	// ...SGN
+	fmoves		%d0,%fp0		// ...SGN IN SGL FMT
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	faddx		%fp1,%fp0
+
+	bra		t_frcinx
+
+TANHSM:
+	movew		#0x0000,XDCARE(%a6)
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fmovex		X(%a6),%fp0		//last inst - possible exception set
+
+	bra		t_frcinx
+
+TANHHUGE:
+//---RETURN SGN(X) - SGN(X)EPS
+	movel		X(%a6),%d0
+	andl		#0x80000000,%d0
+	orl		#0x3F800000,%d0
+	fmoves		%d0,%fp0
+	andl		#0x80000000,%d0
+	eorl		#0x80800000,%d0	// ...-SIGN(X)*EPS
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fadds		%d0,%fp0
+
+	bra		t_frcinx
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/sto_res.s b/c/src/lib/libcpu/m68k/m68040/fpsp/sto_res.s
new file mode 100644
index 0000000000..fcf98a66ed
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sto_res.s
@@ -0,0 +1,98 @@
+//
+//	sto_res.sa 3.1 12/10/90
+//
+//	Takes the result and puts it in where the user expects it.
+//	Library functions return result in fp0.	If fp0 is not the
+//	users destination register then fp0 is moved to the the
+//	correct floating-point destination register.  fp0 and fp1
+//	are then restored to the original contents. 
+//
+//	Input:	result in fp0,fp1 
+//
+//		d2 & a0 should be kept unmodified
+//
+//	Output:	moves the result to the true destination reg or mem
+//
+//	Modifies: destination floating point register
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+STO_RES:	//idnt	2,1 | Motorola 040 Floating Point Software Package
+
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	.global	sto_cos
+sto_cos:
+	bfextu		CMDREG1B(%a6){#13:#3},%d0	//extract cos destination
+	cmpib		#3,%d0		//check for fp0/fp1 cases
+	bles		c_fp0123
+	fmovemx	%fp1-%fp1,-(%a7)
+	moveql		#7,%d1
+	subl		%d0,%d1		//d1 = 7- (dest. reg. no.)
+	clrl		%d0
+	bsetl		%d1,%d0		//d0 is dynamic register mask
+	fmovemx	(%a7)+,%d0
+	rts
+c_fp0123:
+	cmpib		#0,%d0
+	beqs		c_is_fp0
+	cmpib		#1,%d0
+	beqs		c_is_fp1
+	cmpib		#2,%d0
+	beqs		c_is_fp2
+c_is_fp3:
+	fmovemx	%fp1-%fp1,USER_FP3(%a6)
+	rts
+c_is_fp2:
+	fmovemx	%fp1-%fp1,USER_FP2(%a6)
+	rts
+c_is_fp1:
+	fmovemx	%fp1-%fp1,USER_FP1(%a6)
+	rts
+c_is_fp0:
+	fmovemx	%fp1-%fp1,USER_FP0(%a6)
+	rts
+
+
+	.global	sto_res
+sto_res:
+	bfextu		CMDREG1B(%a6){#6:#3},%d0	//extract destination register
+	cmpib		#3,%d0		//check for fp0/fp1 cases
+	bles		fp0123
+	fmovemx	%fp0-%fp0,-(%a7)
+	moveql		#7,%d1
+	subl		%d0,%d1		//d1 = 7- (dest. reg. no.)
+	clrl		%d0
+	bsetl		%d1,%d0		//d0 is dynamic register mask
+	fmovemx	(%a7)+,%d0
+	rts
+fp0123:
+	cmpib		#0,%d0
+	beqs		is_fp0
+	cmpib		#1,%d0
+	beqs		is_fp1
+	cmpib		#2,%d0
+	beqs		is_fp2
+is_fp3:
+	fmovemx	%fp0-%fp0,USER_FP3(%a6)
+	rts
+is_fp2:
+	fmovemx	%fp0-%fp0,USER_FP2(%a6)
+	rts
+is_fp1:
+	fmovemx	%fp0-%fp0,USER_FP1(%a6)
+	rts
+is_fp0:
+	fmovemx	%fp0-%fp0,USER_FP0(%a6)
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/stwotox.s b/c/src/lib/libcpu/m68k/m68040/fpsp/stwotox.s
new file mode 100644
index 0000000000..e4db1c9765
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/stwotox.s
@@ -0,0 +1,427 @@
+//
+//	stwotox.sa 3.1 12/10/90
+//
+//	stwotox  --- 2**X
+//	stwotoxd --- 2**X for denormalized X
+//	stentox  --- 10**X
+//	stentoxd --- 10**X for denormalized X
+//
+//	Input: Double-extended number X in location pointed to
+//		by address register a0.
+//
+//	Output: The function values are returned in Fp0.
+//
+//	Accuracy and Monotonicity: The returned result is within 2 ulps in
+//		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+//		result is subsequently rounded to double precision. The
+//		result is provably monotonic in double precision.
+//
+//	Speed: The program stwotox takes approximately 190 cycles and the
+//		program stentox takes approximately 200 cycles.
+//
+//	Algorithm:
+//
+//	twotox
+//	1. If |X| > 16480, go to ExpBig.
+//
+//	2. If |X| < 2**(-70), go to ExpSm.
+//
+//	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
+//		decompose N as
+//		 N = 64(M + M') + j,  j = 0,1,2,...,63.
+//
+//	4. Overwrite r := r * log2. Then
+//		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
+//		Go to expr to compute that expression.
+//
+//	tentox
+//	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
+//
+//	2. If |X| < 2**(-70), go to ExpSm.
+//
+//	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
+//		N := round-to-int(y). Decompose N as
+//		 N = 64(M + M') + j,  j = 0,1,2,...,63.
+//
+//	4. Define r as
+//		r := ((X - N*L1)-N*L2) * L10
+//		where L1, L2 are the leading and trailing parts of log_10(2)/64
+//		and L10 is the natural log of 10. Then
+//		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
+//		Go to expr to compute that expression.
+//
+//	expr
+//	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
+//
+//	2. Overwrite Fact1 and Fact2 by
+//		Fact1 := 2**(M) * Fact1
+//		Fact2 := 2**(M) * Fact2
+//		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
+//
+//	3. Calculate P where 1 + P approximates exp(r):
+//		P = r + r*r*(A1+r*(A2+...+r*A5)).
+//
+//	4. Let AdjFact := 2**(M'). Return
+//		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
+//		Exit.
+//
+//	ExpBig
+//	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
+//		underflow by Tiny * Tiny.
+//
+//	ExpSm
+//	1. Return 1 + X.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+BOUNDS1:	.long 0x3FB98000,0x400D80C0 // ... 2^(-70),16480
+BOUNDS2:	.long 0x3FB98000,0x400B9B07 // ... 2^(-70),16480 LOG2/LOG10
+
+L2TEN64:	.long 0x406A934F,0x0979A371 // ... 64LOG10/LOG2
+L10TWO1:	.long 0x3F734413,0x509F8000 // ... LOG2/64LOG10
+
+L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
+
+LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
+
+LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
+
+EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
+EXPA4:	.long 0x3F811112,0x302C712C
+EXPA3:	.long 0x3FA55555,0x55554CC1
+EXPA2:	.long 0x3FC55555,0x55554A54
+EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
+
+HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+
+EXPTBL:
+	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
+	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
+	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
+	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
+	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
+	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
+	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
+	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
+	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
+	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
+	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
+	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
+	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
+	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
+	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
+	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
+	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
+	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
+	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
+	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
+	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
+	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
+	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
+	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
+	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
+	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
+	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
+	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
+	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
+	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
+	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
+	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
+	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
+	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
+	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
+	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
+	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
+	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
+	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
+	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
+	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
+	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
+	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
+	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
+	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
+	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
+	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
+	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
+	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
+	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
+	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
+	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
+	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
+	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
+	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
+	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
+	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
+	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
+	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
+	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
+	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
+	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
+	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
+	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
+
+	.set	N,L_SCR1
+
+	.set	X,FP_SCR1
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	ADJFACT,FP_SCR2
+
+	.set	FACT1,FP_SCR3
+	.set	FACT1HI,FACT1+4
+	.set	FACT1LOW,FACT1+8
+
+	.set	FACT2,FP_SCR4
+	.set	FACT2HI,FACT2+4
+	.set	FACT2LOW,FACT2+8
+
+	| xref	t_unfl
+	|xref	t_ovfl
+	|xref	t_frcinx
+
+	.global	stwotoxd
+stwotoxd:
+//--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
+
+	fmovel		%d1,%fpcr		// ...set user's rounding mode/precision
+	fmoves		#0x3F800000,%fp0  // ...RETURN 1 + X
+	movel		(%a0),%d0
+	orl		#0x00800001,%d0
+	fadds		%d0,%fp0
+	bra		t_frcinx
+
+	.global	stwotox
+stwotox:
+//--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+	fmovemx	(%a0),%fp0-%fp0	// ...LOAD INPUT, do not set cc's
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FB98000,%d0		// ...|X| >= 2**(-70)?
+	bges		TWOOK1
+	bra		EXPBORS
+
+TWOOK1:
+	cmpil		#0x400D80C0,%d0		// ...|X| > 16480?
+	bles		TWOMAIN
+	bra		EXPBORS
+	
+
+TWOMAIN:
+//--USUAL CASE, 2^(-70) <= |X| <= 16480
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42800000,%fp1  // ...64 * X
+	
+	fmovel		%fp1,N(%a6)		// ...N = ROUND-TO-INT(64 X)
+	movel		%d2,-(%sp)
+	lea		EXPTBL,%a1 	// ...LOAD ADDRESS OF TABLE OF 2^(J/64)
+	fmovel		N(%a6),%fp1		// ...N --> FLOATING FMT
+	movel		N(%a6),%d0
+	movel		%d0,%d2
+	andil		#0x3F,%d0		// ...D0 IS J
+	asll		#4,%d0		// ...DISPLACEMENT FOR 2^(J/64)
+	addal		%d0,%a1		// ...ADDRESS FOR 2^(J/64)
+	asrl		#6,%d2		// ...d2 IS L, N = 64L + J
+	movel		%d2,%d0
+	asrl		#1,%d0		// ...D0 IS M
+	subl		%d0,%d2		// ...d2 IS M', N = 64(M+M') + J
+	addil		#0x3FFF,%d2
+	movew		%d2,ADJFACT(%a6) 	// ...ADJFACT IS 2^(M')
+	movel		(%sp)+,%d2
+//--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
+//--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
+//--ADJFACT = 2^(M').
+//--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
+
+	fmuls		#0x3C800000,%fp1  // ...(1/64)*N
+	movel		(%a1)+,FACT1(%a6)
+	movel		(%a1)+,FACT1HI(%a6)
+	movel		(%a1)+,FACT1LOW(%a6)
+	movew		(%a1)+,FACT2(%a6)
+	clrw		FACT2+2(%a6)
+
+	fsubx		%fp1,%fp0	 	// ...X - (1/64)*INT(64 X)
+
+	movew		(%a1)+,FACT2HI(%a6)
+	clrw		FACT2HI+2(%a6)
+	clrl		FACT2LOW(%a6)
+	addw		%d0,FACT1(%a6)
+	
+	fmulx		LOG2,%fp0	// ...FP0 IS R
+	addw		%d0,FACT2(%a6)
+
+	bra		expr
+
+EXPBORS:
+//--FPCR, D0 SAVED
+	cmpil		#0x3FFF8000,%d0
+	bgts		EXPBIG
+
+EXPSM:
+//--|X| IS SMALL, RETURN 1 + X
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	fadds		#0x3F800000,%fp0  // ...RETURN 1 + X
+
+	bra		t_frcinx
+
+EXPBIG:
+//--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
+//--REGISTERS SAVE SO FAR ARE FPCR AND  D0
+	movel		X(%a6),%d0
+	cmpil		#0,%d0
+	blts		EXPNEG
+
+	bclrb		#7,(%a0)		//t_ovfl expects positive value
+	bra		t_ovfl
+
+EXPNEG:
+	bclrb		#7,(%a0)		//t_unfl expects positive value
+	bra		t_unfl
+
+	.global	stentoxd
+stentoxd:
+//--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
+
+	fmovel		%d1,%fpcr		// ...set user's rounding mode/precision
+	fmoves		#0x3F800000,%fp0  // ...RETURN 1 + X
+	movel		(%a0),%d0
+	orl		#0x00800001,%d0
+	fadds		%d0,%fp0
+	bra		t_frcinx
+
+	.global	stentox
+stentox:
+//--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+	fmovemx	(%a0),%fp0-%fp0	// ...LOAD INPUT, do not set cc's
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FB98000,%d0		// ...|X| >= 2**(-70)?
+	bges		TENOK1
+	bra		EXPBORS
+
+TENOK1:
+	cmpil		#0x400B9B07,%d0		// ...|X| <= 16480*log2/log10 ?
+	bles		TENMAIN
+	bra		EXPBORS
+
+TENMAIN:
+//--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
+
+	fmovex		%fp0,%fp1
+	fmuld		L2TEN64,%fp1	// ...X*64*LOG10/LOG2
+	
+	fmovel		%fp1,N(%a6)		// ...N=INT(X*64*LOG10/LOG2)
+	movel		%d2,-(%sp)
+	lea		EXPTBL,%a1 	// ...LOAD ADDRESS OF TABLE OF 2^(J/64)
+	fmovel		N(%a6),%fp1		// ...N --> FLOATING FMT
+	movel		N(%a6),%d0
+	movel		%d0,%d2
+	andil		#0x3F,%d0		// ...D0 IS J
+	asll		#4,%d0		// ...DISPLACEMENT FOR 2^(J/64)
+	addal		%d0,%a1		// ...ADDRESS FOR 2^(J/64)
+	asrl		#6,%d2		// ...d2 IS L, N = 64L + J
+	movel		%d2,%d0
+	asrl		#1,%d0		// ...D0 IS M
+	subl		%d0,%d2		// ...d2 IS M', N = 64(M+M') + J
+	addil		#0x3FFF,%d2
+	movew		%d2,ADJFACT(%a6) 	// ...ADJFACT IS 2^(M')
+	movel		(%sp)+,%d2
+
+//--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
+//--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
+//--ADJFACT = 2^(M').
+//--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
+
+	fmovex		%fp1,%fp2
+
+	fmuld		L10TWO1,%fp1	// ...N*(LOG2/64LOG10)_LEAD
+	movel		(%a1)+,FACT1(%a6)
+
+	fmulx		L10TWO2,%fp2	// ...N*(LOG2/64LOG10)_TRAIL
+
+	movel		(%a1)+,FACT1HI(%a6)
+	movel		(%a1)+,FACT1LOW(%a6)
+	fsubx		%fp1,%fp0		// ...X - N L_LEAD
+	movew		(%a1)+,FACT2(%a6)
+
+	fsubx		%fp2,%fp0		// ...X - N L_TRAIL
+
+	clrw		FACT2+2(%a6)
+	movew		(%a1)+,FACT2HI(%a6)
+	clrw		FACT2HI+2(%a6)
+	clrl		FACT2LOW(%a6)
+
+	fmulx		LOG10,%fp0	// ...FP0 IS R
+	
+	addw		%d0,FACT1(%a6)
+	addw		%d0,FACT2(%a6)
+
+expr:
+//--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
+//--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
+//--FP0 IS R. THE FOLLOWING CODE COMPUTES
+//--	2**(M'+M) * 2**(J/64) * EXP(R)
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		// ...FP1 IS S = R*R
+
+	fmoved		EXPA5,%fp2	// ...FP2 IS A5
+	fmoved		EXPA4,%fp3	// ...FP3 IS A4
+
+	fmulx		%fp1,%fp2		// ...FP2 IS S*A5
+	fmulx		%fp1,%fp3		// ...FP3 IS S*A4
+
+	faddd		EXPA3,%fp2	// ...FP2 IS A3+S*A5
+	faddd		EXPA2,%fp3	// ...FP3 IS A2+S*A4
+
+	fmulx		%fp1,%fp2		// ...FP2 IS S*(A3+S*A5)
+	fmulx		%fp1,%fp3		// ...FP3 IS S*(A2+S*A4)
+
+	faddd		EXPA1,%fp2	// ...FP2 IS A1+S*(A3+S*A5)
+	fmulx		%fp0,%fp3		// ...FP3 IS R*S*(A2+S*A4)
+
+	fmulx		%fp1,%fp2		// ...FP2 IS S*(A1+S*(A3+S*A5))
+	faddx		%fp3,%fp0		// ...FP0 IS R+R*S*(A2+S*A4)
+	
+	faddx		%fp2,%fp0		// ...FP0 IS EXP(R) - 1
+	
+
+//--FINAL RECONSTRUCTION PROCESS
+//--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
+
+	fmulx		FACT1(%a6),%fp0
+	faddx		FACT2(%a6),%fp0
+	faddx		FACT1(%a6),%fp0
+
+	fmovel		%d1,%FPCR		//restore users exceptions
+	clrw		ADJFACT+2(%a6)
+	movel		#0x80000000,ADJFACT+4(%a6)
+	clrl		ADJFACT+8(%a6)
+	fmulx		ADJFACT(%a6),%fp0	// ...FINAL ADJUSTMENT
+
+	bra		t_frcinx
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/tbldo.s b/c/src/lib/libcpu/m68k/m68040/fpsp/tbldo.s
new file mode 100644
index 0000000000..7c64dae3e0
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/tbldo.s
@@ -0,0 +1,554 @@
+//
+//	tbldo.sa 3.1 12/10/90
+//
+// Modified:
+//	8/16/90	chinds	The table was constructed to use only one level
+//			of indirection in do_func for monadic
+//			functions.  Dyadic functions require two
+//			levels, and the tables are still contained
+//			in do_func.  The table is arranged for 
+//			index with a 10-bit index, with the first
+//			7 bits the opcode, and the remaining 3
+//			the stag.  For dyadic functions, all
+//			valid addresses are to the generic entry
+//			point. 
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//TBLDO	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	ld_pinf,ld_pone,ld_ppi2
+	|xref	t_dz2,t_operr
+	|xref	serror,sone,szero,sinf,snzrinx
+	|xref	sopr_inf,spi_2,src_nan,szr_inf
+
+	|xref	smovcr
+	|xref	pmod,prem,pscale
+	|xref	satanh,satanhd
+	|xref	sacos,sacosd,sasin,sasind,satan,satand
+	|xref	setox,setoxd,setoxm1,setoxm1d,setoxm1i
+	|xref	sgetexp,sgetexpd,sgetman,sgetmand
+	|xref	sint,sintd,sintrz
+	|xref	ssincos,ssincosd,ssincosi,ssincosnan,ssincosz
+	|xref	scos,scosd,ssin,ssind,stan,stand
+	|xref	scosh,scoshd,ssinh,ssinhd,stanh,stanhd
+	|xref	sslog10,sslog2,sslogn,sslognp1
+	|xref	sslog10d,sslog2d,sslognd,slognp1d
+	|xref	stentox,stentoxd,stwotox,stwotoxd
+
+//	instruction		;opcode-stag Notes
+	.global	tblpre
+tblpre:
+	.long	smovcr		//$00-0 fmovecr all
+	.long	smovcr		//$00-1 fmovecr all
+	.long	smovcr		//$00-2 fmovecr all
+	.long	smovcr		//$00-3 fmovecr all
+	.long	smovcr		//$00-4 fmovecr all
+	.long	smovcr		//$00-5 fmovecr all
+	.long	smovcr		//$00-6 fmovecr all
+	.long	smovcr		//$00-7 fmovecr all
+
+	.long	sint		//$01-0 fint norm
+	.long	szero		//$01-1 fint zero 
+	.long	sinf		//$01-2 fint inf
+	.long	src_nan		//$01-3 fint nan
+	.long	sintd		//$01-4 fint denorm inx
+	.long	serror		//$01-5 fint ERROR
+	.long	serror		//$01-6 fint ERROR
+	.long	serror		//$01-7 fint ERROR
+
+	.long	ssinh		//$02-0 fsinh norm
+	.long	szero		//$02-1 fsinh zero
+	.long	sinf		//$02-2 fsinh inf
+	.long	src_nan		//$02-3 fsinh nan
+	.long	ssinhd		//$02-4 fsinh denorm
+	.long	serror		//$02-5 fsinh ERROR
+	.long	serror		//$02-6 fsinh ERROR
+	.long	serror		//$02-7 fsinh ERROR
+
+	.long	sintrz		//$03-0 fintrz norm
+	.long	szero		//$03-1 fintrz zero
+	.long	sinf		//$03-2 fintrz inf
+	.long	src_nan		//$03-3 fintrz nan
+	.long	snzrinx		//$03-4 fintrz denorm inx
+	.long	serror		//$03-5 fintrz ERROR
+	.long	serror		//$03-6 fintrz ERROR
+	.long	serror		//$03-7 fintrz ERROR
+
+	.long	serror		//$04-0 ERROR - illegal extension
+	.long	serror		//$04-1 ERROR - illegal extension
+	.long	serror		//$04-2 ERROR - illegal extension
+	.long	serror		//$04-3 ERROR - illegal extension
+	.long	serror		//$04-4 ERROR - illegal extension
+	.long	serror		//$04-5 ERROR - illegal extension
+	.long	serror		//$04-6 ERROR - illegal extension
+	.long	serror		//$04-7 ERROR - illegal extension
+
+	.long	serror		//$05-0 ERROR - illegal extension
+	.long	serror		//$05-1 ERROR - illegal extension
+	.long	serror		//$05-2 ERROR - illegal extension
+	.long	serror		//$05-3 ERROR - illegal extension
+	.long	serror		//$05-4 ERROR - illegal extension
+	.long	serror		//$05-5 ERROR - illegal extension
+	.long	serror		//$05-6 ERROR - illegal extension
+	.long	serror		//$05-7 ERROR - illegal extension
+
+	.long	sslognp1	//$06-0 flognp1 norm
+	.long	szero		//$06-1 flognp1 zero
+	.long	sopr_inf	//$06-2 flognp1 inf
+	.long	src_nan		//$06-3 flognp1 nan
+	.long	slognp1d	//$06-4 flognp1 denorm
+	.long	serror		//$06-5 flognp1 ERROR
+	.long	serror		//$06-6 flognp1 ERROR
+	.long	serror		//$06-7 flognp1 ERROR
+
+	.long	serror		//$07-0 ERROR - illegal extension
+	.long	serror		//$07-1 ERROR - illegal extension
+	.long	serror		//$07-2 ERROR - illegal extension
+	.long	serror		//$07-3 ERROR - illegal extension
+	.long	serror		//$07-4 ERROR - illegal extension
+	.long	serror		//$07-5 ERROR - illegal extension
+	.long	serror		//$07-6 ERROR - illegal extension
+	.long	serror		//$07-7 ERROR - illegal extension
+
+	.long	setoxm1		//$08-0 fetoxm1 norm
+	.long	szero		//$08-1 fetoxm1 zero
+	.long	setoxm1i	//$08-2 fetoxm1 inf
+	.long	src_nan		//$08-3 fetoxm1 nan
+	.long	setoxm1d	//$08-4 fetoxm1 denorm
+	.long	serror		//$08-5 fetoxm1 ERROR
+	.long	serror		//$08-6 fetoxm1 ERROR
+	.long	serror		//$08-7 fetoxm1 ERROR
+
+	.long	stanh		//$09-0 ftanh norm
+	.long	szero		//$09-1 ftanh zero
+	.long	sone		//$09-2 ftanh inf
+	.long	src_nan		//$09-3 ftanh nan
+	.long	stanhd		//$09-4 ftanh denorm
+	.long	serror		//$09-5 ftanh ERROR
+	.long	serror		//$09-6 ftanh ERROR
+	.long	serror		//$09-7 ftanh ERROR
+
+	.long	satan		//$0a-0 fatan norm
+	.long	szero		//$0a-1 fatan zero
+	.long	spi_2		//$0a-2 fatan inf
+	.long	src_nan		//$0a-3 fatan nan
+	.long	satand		//$0a-4 fatan denorm
+	.long	serror		//$0a-5 fatan ERROR
+	.long	serror		//$0a-6 fatan ERROR
+	.long	serror		//$0a-7 fatan ERROR
+
+	.long	serror		//$0b-0 ERROR - illegal extension
+	.long	serror		//$0b-1 ERROR - illegal extension
+	.long	serror		//$0b-2 ERROR - illegal extension
+	.long	serror		//$0b-3 ERROR - illegal extension
+	.long	serror		//$0b-4 ERROR - illegal extension
+	.long	serror		//$0b-5 ERROR - illegal extension
+	.long	serror		//$0b-6 ERROR - illegal extension
+	.long	serror		//$0b-7 ERROR - illegal extension
+
+	.long	sasin		//$0c-0 fasin norm
+	.long	szero		//$0c-1 fasin zero
+	.long	t_operr		//$0c-2 fasin inf
+	.long	src_nan		//$0c-3 fasin nan
+	.long	sasind		//$0c-4 fasin denorm
+	.long	serror		//$0c-5 fasin ERROR
+	.long	serror		//$0c-6 fasin ERROR
+	.long	serror		//$0c-7 fasin ERROR
+
+	.long	satanh		//$0d-0 fatanh norm
+	.long	szero		//$0d-1 fatanh zero
+	.long	t_operr		//$0d-2 fatanh inf
+	.long	src_nan		//$0d-3 fatanh nan
+	.long	satanhd		//$0d-4 fatanh denorm
+	.long	serror		//$0d-5 fatanh ERROR
+	.long	serror		//$0d-6 fatanh ERROR
+	.long	serror		//$0d-7 fatanh ERROR
+
+	.long	ssin		//$0e-0 fsin norm
+	.long	szero		//$0e-1 fsin zero
+	.long	t_operr		//$0e-2 fsin inf
+	.long	src_nan		//$0e-3 fsin nan
+	.long	ssind		//$0e-4 fsin denorm
+	.long	serror		//$0e-5 fsin ERROR
+	.long	serror		//$0e-6 fsin ERROR
+	.long	serror		//$0e-7 fsin ERROR
+
+	.long	stan		//$0f-0 ftan norm
+	.long	szero		//$0f-1 ftan zero
+	.long	t_operr		//$0f-2 ftan inf
+	.long	src_nan		//$0f-3 ftan nan
+	.long	stand		//$0f-4 ftan denorm
+	.long	serror		//$0f-5 ftan ERROR
+	.long	serror		//$0f-6 ftan ERROR
+	.long	serror		//$0f-7 ftan ERROR
+
+	.long	setox		//$10-0 fetox norm
+	.long	ld_pone		//$10-1 fetox zero
+	.long	szr_inf		//$10-2 fetox inf
+	.long	src_nan		//$10-3 fetox nan
+	.long	setoxd		//$10-4 fetox denorm
+	.long	serror		//$10-5 fetox ERROR
+	.long	serror		//$10-6 fetox ERROR
+	.long	serror		//$10-7 fetox ERROR
+
+	.long	stwotox		//$11-0 ftwotox norm
+	.long	ld_pone		//$11-1 ftwotox zero
+	.long	szr_inf		//$11-2 ftwotox inf
+	.long	src_nan		//$11-3 ftwotox nan
+	.long	stwotoxd	//$11-4 ftwotox denorm
+	.long	serror		//$11-5 ftwotox ERROR
+	.long	serror		//$11-6 ftwotox ERROR
+	.long	serror		//$11-7 ftwotox ERROR
+
+	.long	stentox		//$12-0 ftentox norm
+	.long	ld_pone		//$12-1 ftentox zero
+	.long	szr_inf		//$12-2 ftentox inf
+	.long	src_nan		//$12-3 ftentox nan
+	.long	stentoxd	//$12-4 ftentox denorm
+	.long	serror		//$12-5 ftentox ERROR
+	.long	serror		//$12-6 ftentox ERROR
+	.long	serror		//$12-7 ftentox ERROR
+
+	.long	serror		//$13-0 ERROR - illegal extension
+	.long	serror		//$13-1 ERROR - illegal extension
+	.long	serror		//$13-2 ERROR - illegal extension
+	.long	serror		//$13-3 ERROR - illegal extension
+	.long	serror		//$13-4 ERROR - illegal extension
+	.long	serror		//$13-5 ERROR - illegal extension
+	.long	serror		//$13-6 ERROR - illegal extension
+	.long	serror		//$13-7 ERROR - illegal extension
+
+	.long	sslogn		//$14-0 flogn norm
+	.long	t_dz2		//$14-1 flogn zero
+	.long	sopr_inf	//$14-2 flogn inf
+	.long	src_nan		//$14-3 flogn nan
+	.long	sslognd		//$14-4 flogn denorm
+	.long	serror		//$14-5 flogn ERROR
+	.long	serror		//$14-6 flogn ERROR
+	.long	serror		//$14-7 flogn ERROR
+
+	.long	sslog10		//$15-0 flog10 norm
+	.long	t_dz2		//$15-1 flog10 zero
+	.long	sopr_inf	//$15-2 flog10 inf
+	.long	src_nan		//$15-3 flog10 nan
+	.long	sslog10d	//$15-4 flog10 denorm
+	.long	serror		//$15-5 flog10 ERROR
+	.long	serror		//$15-6 flog10 ERROR
+	.long	serror		//$15-7 flog10 ERROR
+
+	.long	sslog2		//$16-0 flog2 norm
+	.long	t_dz2		//$16-1 flog2 zero
+	.long	sopr_inf	//$16-2 flog2 inf
+	.long	src_nan		//$16-3 flog2 nan
+	.long	sslog2d		//$16-4 flog2 denorm
+	.long	serror		//$16-5 flog2 ERROR
+	.long	serror		//$16-6 flog2 ERROR
+	.long	serror		//$16-7 flog2 ERROR
+
+	.long	serror		//$17-0 ERROR - illegal extension
+	.long	serror		//$17-1 ERROR - illegal extension
+	.long	serror		//$17-2 ERROR - illegal extension
+	.long	serror		//$17-3 ERROR - illegal extension
+	.long	serror		//$17-4 ERROR - illegal extension
+	.long	serror		//$17-5 ERROR - illegal extension
+	.long	serror		//$17-6 ERROR - illegal extension
+	.long	serror		//$17-7 ERROR - illegal extension
+
+	.long	serror		//$18-0 ERROR - illegal extension
+	.long	serror		//$18-1 ERROR - illegal extension
+	.long	serror		//$18-2 ERROR - illegal extension
+	.long	serror		//$18-3 ERROR - illegal extension
+	.long	serror		//$18-4 ERROR - illegal extension
+	.long	serror		//$18-5 ERROR - illegal extension
+	.long	serror		//$18-6 ERROR - illegal extension
+	.long	serror		//$18-7 ERROR - illegal extension
+
+	.long	scosh		//$19-0 fcosh norm
+	.long	ld_pone		//$19-1 fcosh zero
+	.long	ld_pinf		//$19-2 fcosh inf
+	.long	src_nan		//$19-3 fcosh nan
+	.long	scoshd		//$19-4 fcosh denorm
+	.long	serror		//$19-5 fcosh ERROR
+	.long	serror		//$19-6 fcosh ERROR
+	.long	serror		//$19-7 fcosh ERROR
+
+	.long	serror		//$1a-0 ERROR - illegal extension
+	.long	serror		//$1a-1 ERROR - illegal extension
+	.long	serror		//$1a-2 ERROR - illegal extension
+	.long	serror		//$1a-3 ERROR - illegal extension
+	.long	serror		//$1a-4 ERROR - illegal extension
+	.long	serror		//$1a-5 ERROR - illegal extension
+	.long	serror		//$1a-6 ERROR - illegal extension
+	.long	serror		//$1a-7 ERROR - illegal extension
+
+	.long	serror		//$1b-0 ERROR - illegal extension
+	.long	serror		//$1b-1 ERROR - illegal extension
+	.long	serror		//$1b-2 ERROR - illegal extension
+	.long	serror		//$1b-3 ERROR - illegal extension
+	.long	serror		//$1b-4 ERROR - illegal extension
+	.long	serror		//$1b-5 ERROR - illegal extension
+	.long	serror		//$1b-6 ERROR - illegal extension
+	.long	serror		//$1b-7 ERROR - illegal extension
+
+	.long	sacos		//$1c-0 facos norm
+	.long	ld_ppi2		//$1c-1 facos zero
+	.long	t_operr		//$1c-2 facos inf
+	.long	src_nan		//$1c-3 facos nan
+	.long	sacosd		//$1c-4 facos denorm
+	.long	serror		//$1c-5 facos ERROR
+	.long	serror		//$1c-6 facos ERROR
+	.long	serror		//$1c-7 facos ERROR
+
+	.long	scos		//$1d-0 fcos norm
+	.long	ld_pone		//$1d-1 fcos zero
+	.long	t_operr		//$1d-2 fcos inf
+	.long	src_nan		//$1d-3 fcos nan
+	.long	scosd		//$1d-4 fcos denorm
+	.long	serror		//$1d-5 fcos ERROR
+	.long	serror		//$1d-6 fcos ERROR
+	.long	serror		//$1d-7 fcos ERROR
+
+	.long	sgetexp		//$1e-0 fgetexp norm
+	.long	szero		//$1e-1 fgetexp zero
+	.long	t_operr		//$1e-2 fgetexp inf
+	.long	src_nan		//$1e-3 fgetexp nan
+	.long	sgetexpd	//$1e-4 fgetexp denorm
+	.long	serror		//$1e-5 fgetexp ERROR
+	.long	serror		//$1e-6 fgetexp ERROR
+	.long	serror		//$1e-7 fgetexp ERROR
+
+	.long	sgetman		//$1f-0 fgetman norm
+	.long	szero		//$1f-1 fgetman zero
+	.long	t_operr		//$1f-2 fgetman inf
+	.long	src_nan		//$1f-3 fgetman nan
+	.long	sgetmand	//$1f-4 fgetman denorm
+	.long	serror		//$1f-5 fgetman ERROR
+	.long	serror		//$1f-6 fgetman ERROR
+	.long	serror		//$1f-7 fgetman ERROR
+
+	.long	serror		//$20-0 ERROR - illegal extension
+	.long	serror		//$20-1 ERROR - illegal extension
+	.long	serror		//$20-2 ERROR - illegal extension
+	.long	serror		//$20-3 ERROR - illegal extension
+	.long	serror		//$20-4 ERROR - illegal extension
+	.long	serror		//$20-5 ERROR - illegal extension
+	.long	serror		//$20-6 ERROR - illegal extension
+	.long	serror		//$20-7 ERROR - illegal extension
+
+	.long	pmod		//$21-0 fmod all
+	.long	pmod		//$21-1 fmod all
+	.long	pmod		//$21-2 fmod all
+	.long	pmod		//$21-3 fmod all
+	.long	pmod		//$21-4 fmod all
+	.long	serror		//$21-5 fmod ERROR
+	.long	serror		//$21-6 fmod ERROR
+	.long	serror		//$21-7 fmod ERROR
+
+	.long	serror		//$22-0 ERROR - illegal extension
+	.long	serror		//$22-1 ERROR - illegal extension
+	.long	serror		//$22-2 ERROR - illegal extension
+	.long	serror		//$22-3 ERROR - illegal extension
+	.long	serror		//$22-4 ERROR - illegal extension
+	.long	serror		//$22-5 ERROR - illegal extension
+	.long	serror		//$22-6 ERROR - illegal extension
+	.long	serror		//$22-7 ERROR - illegal extension
+
+	.long	serror		//$23-0 ERROR - illegal extension
+	.long	serror		//$23-1 ERROR - illegal extension
+	.long	serror		//$23-2 ERROR - illegal extension
+	.long	serror		//$23-3 ERROR - illegal extension
+	.long	serror		//$23-4 ERROR - illegal extension
+	.long	serror		//$23-5 ERROR - illegal extension
+	.long	serror		//$23-6 ERROR - illegal extension
+	.long	serror		//$23-7 ERROR - illegal extension
+
+	.long	serror		//$24-0 ERROR - illegal extension
+	.long	serror		//$24-1 ERROR - illegal extension
+	.long	serror		//$24-2 ERROR - illegal extension
+	.long	serror		//$24-3 ERROR - illegal extension
+	.long	serror		//$24-4 ERROR - illegal extension
+	.long	serror		//$24-5 ERROR - illegal extension
+	.long	serror		//$24-6 ERROR - illegal extension
+	.long	serror		//$24-7 ERROR - illegal extension
+
+	.long	prem		//$25-0 frem all
+	.long	prem		//$25-1 frem all
+	.long	prem		//$25-2 frem all
+	.long	prem		//$25-3 frem all
+	.long	prem		//$25-4 frem all
+	.long	serror		//$25-5 frem ERROR
+	.long	serror		//$25-6 frem ERROR
+	.long	serror		//$25-7 frem ERROR
+
+	.long	pscale		//$26-0 fscale all
+	.long	pscale		//$26-1 fscale all
+	.long	pscale		//$26-2 fscale all
+	.long	pscale		//$26-3 fscale all
+	.long	pscale		//$26-4 fscale all
+	.long	serror		//$26-5 fscale ERROR
+	.long	serror		//$26-6 fscale ERROR
+	.long	serror		//$26-7 fscale ERROR
+
+	.long	serror		//$27-0 ERROR - illegal extension
+	.long	serror		//$27-1 ERROR - illegal extension
+	.long	serror		//$27-2 ERROR - illegal extension
+	.long	serror		//$27-3 ERROR - illegal extension
+	.long	serror		//$27-4 ERROR - illegal extension
+	.long	serror		//$27-5 ERROR - illegal extension
+	.long	serror		//$27-6 ERROR - illegal extension
+	.long	serror		//$27-7 ERROR - illegal extension
+
+	.long	serror		//$28-0 ERROR - illegal extension
+	.long	serror		//$28-1 ERROR - illegal extension
+	.long	serror		//$28-2 ERROR - illegal extension
+	.long	serror		//$28-3 ERROR - illegal extension
+	.long	serror		//$28-4 ERROR - illegal extension
+	.long	serror		//$28-5 ERROR - illegal extension
+	.long	serror		//$28-6 ERROR - illegal extension
+	.long	serror		//$28-7 ERROR - illegal extension
+
+	.long	serror		//$29-0 ERROR - illegal extension
+	.long	serror		//$29-1 ERROR - illegal extension
+	.long	serror		//$29-2 ERROR - illegal extension
+	.long	serror		//$29-3 ERROR - illegal extension
+	.long	serror		//$29-4 ERROR - illegal extension
+	.long	serror		//$29-5 ERROR - illegal extension
+	.long	serror		//$29-6 ERROR - illegal extension
+	.long	serror		//$29-7 ERROR - illegal extension
+
+	.long	serror		//$2a-0 ERROR - illegal extension
+	.long	serror		//$2a-1 ERROR - illegal extension
+	.long	serror		//$2a-2 ERROR - illegal extension
+	.long	serror		//$2a-3 ERROR - illegal extension
+	.long	serror		//$2a-4 ERROR - illegal extension
+	.long	serror		//$2a-5 ERROR - illegal extension
+	.long	serror		//$2a-6 ERROR - illegal extension
+	.long	serror		//$2a-7 ERROR - illegal extension
+
+	.long	serror		//$2b-0 ERROR - illegal extension
+	.long	serror		//$2b-1 ERROR - illegal extension
+	.long	serror		//$2b-2 ERROR - illegal extension
+	.long	serror		//$2b-3 ERROR - illegal extension
+	.long	serror		//$2b-4 ERROR - illegal extension
+	.long	serror		//$2b-5 ERROR - illegal extension
+	.long	serror		//$2b-6 ERROR - illegal extension
+	.long	serror		//$2b-7 ERROR - illegal extension
+
+	.long	serror		//$2c-0 ERROR - illegal extension
+	.long	serror		//$2c-1 ERROR - illegal extension
+	.long	serror		//$2c-2 ERROR - illegal extension
+	.long	serror		//$2c-3 ERROR - illegal extension
+	.long	serror		//$2c-4 ERROR - illegal extension
+	.long	serror		//$2c-5 ERROR - illegal extension
+	.long	serror		//$2c-6 ERROR - illegal extension
+	.long	serror		//$2c-7 ERROR - illegal extension
+
+	.long	serror		//$2d-0 ERROR - illegal extension
+	.long	serror		//$2d-1 ERROR - illegal extension
+	.long	serror		//$2d-2 ERROR - illegal extension
+	.long	serror		//$2d-3 ERROR - illegal extension
+	.long	serror		//$2d-4 ERROR - illegal extension
+	.long	serror		//$2d-5 ERROR - illegal extension
+	.long	serror		//$2d-6 ERROR - illegal extension
+	.long	serror		//$2d-7 ERROR - illegal extension
+
+	.long	serror		//$2e-0 ERROR - illegal extension
+	.long	serror		//$2e-1 ERROR - illegal extension
+	.long	serror		//$2e-2 ERROR - illegal extension
+	.long	serror		//$2e-3 ERROR - illegal extension
+	.long	serror		//$2e-4 ERROR - illegal extension
+	.long	serror		//$2e-5 ERROR - illegal extension
+	.long	serror		//$2e-6 ERROR - illegal extension
+	.long	serror		//$2e-7 ERROR - illegal extension
+
+	.long	serror		//$2f-0 ERROR - illegal extension
+	.long	serror		//$2f-1 ERROR - illegal extension
+	.long	serror		//$2f-2 ERROR - illegal extension
+	.long	serror		//$2f-3 ERROR - illegal extension
+	.long	serror		//$2f-4 ERROR - illegal extension
+	.long	serror		//$2f-5 ERROR - illegal extension
+	.long	serror		//$2f-6 ERROR - illegal extension
+	.long	serror		//$2f-7 ERROR - illegal extension
+
+	.long	ssincos		//$30-0 fsincos norm
+	.long	ssincosz	//$30-1 fsincos zero
+	.long	ssincosi	//$30-2 fsincos inf
+	.long	ssincosnan	//$30-3 fsincos nan
+	.long	ssincosd	//$30-4 fsincos denorm
+	.long	serror		//$30-5 fsincos ERROR
+	.long	serror		//$30-6 fsincos ERROR
+	.long	serror		//$30-7 fsincos ERROR
+
+	.long	ssincos		//$31-0 fsincos norm
+	.long	ssincosz	//$31-1 fsincos zero
+	.long	ssincosi	//$31-2 fsincos inf
+	.long	ssincosnan	//$31-3 fsincos nan
+	.long	ssincosd	//$31-4 fsincos denorm
+	.long	serror		//$31-5 fsincos ERROR
+	.long	serror		//$31-6 fsincos ERROR
+	.long	serror		//$31-7 fsincos ERROR
+
+	.long	ssincos		//$32-0 fsincos norm
+	.long	ssincosz	//$32-1 fsincos zero
+	.long	ssincosi	//$32-2 fsincos inf
+	.long	ssincosnan	//$32-3 fsincos nan
+	.long	ssincosd	//$32-4 fsincos denorm
+	.long	serror		//$32-5 fsincos ERROR
+	.long	serror		//$32-6 fsincos ERROR
+	.long	serror		//$32-7 fsincos ERROR
+
+	.long	ssincos		//$33-0 fsincos norm
+	.long	ssincosz	//$33-1 fsincos zero
+	.long	ssincosi	//$33-2 fsincos inf
+	.long	ssincosnan	//$33-3 fsincos nan
+	.long	ssincosd	//$33-4 fsincos denorm
+	.long	serror		//$33-5 fsincos ERROR
+	.long	serror		//$33-6 fsincos ERROR
+	.long	serror		//$33-7 fsincos ERROR
+
+	.long	ssincos		//$34-0 fsincos norm
+	.long	ssincosz	//$34-1 fsincos zero
+	.long	ssincosi	//$34-2 fsincos inf
+	.long	ssincosnan	//$34-3 fsincos nan
+	.long	ssincosd	//$34-4 fsincos denorm
+	.long	serror		//$34-5 fsincos ERROR
+	.long	serror		//$34-6 fsincos ERROR
+	.long	serror		//$34-7 fsincos ERROR
+
+	.long	ssincos		//$35-0 fsincos norm
+	.long	ssincosz	//$35-1 fsincos zero
+	.long	ssincosi	//$35-2 fsincos inf
+	.long	ssincosnan	//$35-3 fsincos nan
+	.long	ssincosd	//$35-4 fsincos denorm
+	.long	serror		//$35-5 fsincos ERROR
+	.long	serror		//$35-6 fsincos ERROR
+	.long	serror		//$35-7 fsincos ERROR
+
+	.long	ssincos		//$36-0 fsincos norm
+	.long	ssincosz	//$36-1 fsincos zero
+	.long	ssincosi	//$36-2 fsincos inf
+	.long	ssincosnan	//$36-3 fsincos nan
+	.long	ssincosd	//$36-4 fsincos denorm
+	.long	serror		//$36-5 fsincos ERROR
+	.long	serror		//$36-6 fsincos ERROR
+	.long	serror		//$36-7 fsincos ERROR
+
+	.long	ssincos		//$37-0 fsincos norm
+	.long	ssincosz	//$37-1 fsincos zero
+	.long	ssincosi	//$37-2 fsincos inf
+	.long	ssincosnan	//$37-3 fsincos nan
+	.long	ssincosd	//$37-4 fsincos denorm
+	.long	serror		//$37-5 fsincos ERROR
+	.long	serror		//$37-6 fsincos ERROR
+	.long	serror		//$37-7 fsincos ERROR
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/util.s b/c/src/lib/libcpu/m68k/m68040/fpsp/util.s
new file mode 100644
index 0000000000..c6f6570437
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/util.s
@@ -0,0 +1,748 @@
+//
+//	util.sa 3.7 7/29/91
+//
+//	This file contains routines used by other programs.
+//
+//	ovf_res: used by overflow to force the correct
+//		 result. ovf_r_k, ovf_r_x2, ovf_r_x3 are 
+//		 derivatives of this routine.
+//	get_fline: get user's opcode word
+//	g_dfmtou: returns the destination format.
+//	g_opcls: returns the opclass of the float instruction.
+//	g_rndpr: returns the rounding precision. 
+//	reg_dest: write byte, word, or long data to Dn
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+//UTIL	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	mem_read
+
+	.global	g_dfmtou
+	.global	g_opcls
+	.global	g_rndpr
+	.global	get_fline
+	.global	reg_dest
+
+//
+// Final result table for ovf_res. Note that the negative counterparts
+// are unnecessary as ovf_res always returns the sign separately from
+// the exponent.
+//					;+inf
+EXT_PINF:	.long	0x7fff0000,0x00000000,0x00000000,0x00000000	
+//					;largest +ext
+EXT_PLRG:	.long	0x7ffe0000,0xffffffff,0xffffffff,0x00000000	
+//					;largest magnitude +sgl in ext
+SGL_PLRG:	.long	0x407e0000,0xffffff00,0x00000000,0x00000000	
+//					;largest magnitude +dbl in ext
+DBL_PLRG:	.long	0x43fe0000,0xffffffff,0xfffff800,0x00000000	
+//					;largest -ext
+
+tblovfl:
+	.long	EXT_RN
+	.long	EXT_RZ
+	.long	EXT_RM
+	.long	EXT_RP
+	.long	SGL_RN
+	.long	SGL_RZ
+	.long	SGL_RM
+	.long	SGL_RP
+	.long	DBL_RN
+	.long	DBL_RZ
+	.long	DBL_RM
+	.long	DBL_RP
+	.long	error
+	.long	error
+	.long	error
+	.long	error
+
+
+//
+//	ovf_r_k --- overflow result calculation
+//
+// This entry point is used by kernel_ex.  
+//
+// This forces the destination precision to be extended
+//
+// Input:	operand in ETEMP
+// Output:	a result is in ETEMP (internal extended format)
+//
+	.global	ovf_r_k
+ovf_r_k:
+	lea	ETEMP(%a6),%a0	//a0 points to source operand	
+	bclrb	#sign_bit,ETEMP_EX(%a6)
+	sne	ETEMP_SGN(%a6)	//convert to internal IEEE format
+
+//
+//	ovf_r_x2 --- overflow result calculation
+//
+// This entry point used by x_ovfl.  (opclass 0 and 2)
+//
+// Input		a0  points to an operand in the internal extended format
+// Output	a0  points to the result in the internal extended format
+//
+// This sets the round precision according to the user's FPCR unless the
+// instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul,
+// fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg.
+// If the instruction is fsgldiv of fsglmul, the rounding precision must be
+// extended.  If the instruction is not fsgldiv or fsglmul but a force-
+// precision instruction, the rounding precision is then set to the force
+// precision.
+
+	.global	ovf_r_x2
+ovf_r_x2:
+	btstb	#E3,E_BYTE(%a6)		//check for nu exception
+	beql	ovf_e1_exc		//it is cu exception
+ovf_e3_exc:
+	movew	CMDREG3B(%a6),%d0		//get the command word
+	andiw	#0x00000060,%d0		//clear all bits except 6 and 5
+	cmpil	#0x00000040,%d0
+	beql	ovff_sgl		//force precision is single
+	cmpil	#0x00000060,%d0
+	beql	ovff_dbl		//force precision is double
+	movew	CMDREG3B(%a6),%d0		//get the command word again
+	andil	#0x7f,%d0			//clear all except operation
+	cmpil	#0x33,%d0			
+	beql	ovf_fsgl		//fsglmul or fsgldiv
+	cmpil	#0x30,%d0
+	beql	ovf_fsgl		
+	bra	ovf_fpcr		//instruction is none of the above
+//					;use FPCR
+ovf_e1_exc:
+	movew	CMDREG1B(%a6),%d0		//get command word
+	andil	#0x00000044,%d0		//clear all bits except 6 and 2
+	cmpil	#0x00000040,%d0
+	beql	ovff_sgl		//the instruction is force single
+	cmpil	#0x00000044,%d0
+	beql	ovff_dbl		//the instruction is force double
+	movew	CMDREG1B(%a6),%d0		//again get the command word
+	andil	#0x0000007f,%d0		//clear all except the op code
+	cmpil	#0x00000027,%d0
+	beql	ovf_fsgl		//fsglmul
+	cmpil 	#0x00000024,%d0
+	beql	ovf_fsgl		//fsgldiv
+	bra	ovf_fpcr		//none of the above, use FPCR
+// 
+//
+// Inst is either fsgldiv or fsglmul.  Force extended precision.
+//
+ovf_fsgl:
+	clrl	%d0
+	bras	ovf_res
+
+ovff_sgl:
+	movel	#0x00000001,%d0		//set single
+	bras	ovf_res
+ovff_dbl:
+	movel	#0x00000002,%d0		//set double
+	bras	ovf_res
+//
+// The precision is in the fpcr.
+//
+ovf_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0 //set round precision
+	bras	ovf_res
+	
+//
+//
+//	ovf_r_x3 --- overflow result calculation
+//
+// This entry point used by x_ovfl. (opclass 3 only)
+//
+// Input		a0  points to an operand in the internal extended format
+// Output	a0  points to the result in the internal extended format
+//
+// This sets the round precision according to the destination size.
+//
+	.global	ovf_r_x3
+ovf_r_x3:
+	bsr	g_dfmtou	//get dest fmt in d0{1:0}
+//				;for fmovout, the destination format
+//				;is the rounding precision
+
+//
+//	ovf_res --- overflow result calculation
+//
+// Input:
+//	a0 	points to operand in internal extended format
+// Output:
+//	a0 	points to result in internal extended format
+//
+	.global	ovf_res
+ovf_res:
+	lsll	#2,%d0		//move round precision to d0{3:2}
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1 //set round mode
+	orl	%d1,%d0		//index is fmt:mode in d0{3:0}
+	leal	tblovfl,%a1	//load a1 with table address
+	movel	%a1@(%d0:l:4),%a1	//use d0 as index to the table
+	jmp	(%a1)		//go to the correct routine
+//
+//case DEST_FMT = EXT
+//
+EXT_RN:
+	leal	EXT_PINF,%a1	//answer is +/- infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	set_sign	//now go set the sign	
+EXT_RZ:
+	leal	EXT_PLRG,%a1	//answer is +/- large number
+	bra	set_sign	//now go set the sign
+EXT_RM:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	e_rm_pos
+e_rm_neg:
+	leal	EXT_PINF,%a1	//answer is negative infinity
+	orl	#neginf_mask,USER_FPSR(%a6)
+	bra	end_ovfr
+e_rm_pos:
+	leal	EXT_PLRG,%a1	//answer is large positive number
+	bra	end_ovfr
+EXT_RP:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	e_rp_pos
+e_rp_neg:
+	leal	EXT_PLRG,%a1	//answer is large negative number
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+e_rp_pos:
+	leal	EXT_PINF,%a1	//answer is positive infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+//
+//case DEST_FMT = DBL
+//
+DBL_RN:
+	leal	EXT_PINF,%a1	//answer is +/- infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	set_sign
+DBL_RZ:
+	leal	DBL_PLRG,%a1	//answer is +/- large number
+	bra	set_sign	//now go set the sign
+DBL_RM:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	d_rm_pos
+d_rm_neg:
+	leal	EXT_PINF,%a1	//answer is negative infinity
+	orl	#neginf_mask,USER_FPSR(%a6)
+	bra	end_ovfr	//inf is same for all precisions (ext,dbl,sgl)
+d_rm_pos:
+	leal	DBL_PLRG,%a1	//answer is large positive number
+	bra	end_ovfr
+DBL_RP:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	d_rp_pos
+d_rp_neg:
+	leal	DBL_PLRG,%a1	//answer is large negative number
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+d_rp_pos:
+	leal	EXT_PINF,%a1	//answer is positive infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+//
+//case DEST_FMT = SGL
+//
+SGL_RN:
+	leal	EXT_PINF,%a1	//answer is +/-  infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bras	set_sign
+SGL_RZ:
+	leal	SGL_PLRG,%a1	//answer is +/- large number
+	bras	set_sign
+SGL_RM:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	s_rm_pos
+s_rm_neg:
+	leal	EXT_PINF,%a1	//answer is negative infinity
+	orl	#neginf_mask,USER_FPSR(%a6)
+	bras	end_ovfr
+s_rm_pos:
+	leal	SGL_PLRG,%a1	//answer is large positive number
+	bras	end_ovfr
+SGL_RP:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	s_rp_pos
+s_rp_neg:
+	leal	SGL_PLRG,%a1	//answer is large negative number
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bras	end_ovfr
+s_rp_pos:
+	leal	EXT_PINF,%a1	//answer is positive infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bras	end_ovfr
+
+set_sign:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	end_ovfr
+neg_sign:
+	bsetb	#neg_bit,FPSR_CC(%a6)
+
+end_ovfr:
+	movew	LOCAL_EX(%a1),LOCAL_EX(%a0) //do not overwrite sign
+	movel	LOCAL_HI(%a1),LOCAL_HI(%a0)
+	movel	LOCAL_LO(%a1),LOCAL_LO(%a0)
+	rts
+
+
+//
+//	ERROR
+//
+error:
+	rts
+//
+//	get_fline --- get f-line opcode of interrupted instruction
+//
+//	Returns opcode in the low word of d0.
+//
+get_fline:
+	movel	USER_FPIAR(%a6),%a0	//opcode address
+	movel	#0,-(%a7)	//reserve a word on the stack
+	leal	2(%a7),%a1	//point to low word of temporary
+	movel	#2,%d0		//count
+	bsrl	mem_read
+	movel	(%a7)+,%d0
+	rts
+//
+// 	g_rndpr --- put rounding precision in d0{1:0}
+//	
+//	valid return codes are:
+//		00 - extended 
+//		01 - single
+//		10 - double
+//
+// begin
+// get rounding precision (cmdreg3b{6:5})
+// begin
+//  case	opclass = 011 (move out)
+//	get destination format - this is the also the rounding precision
+//
+//  case	opclass = 0x0
+//	if E3
+//	    *case RndPr(from cmdreg3b{6:5} = 11  then RND_PREC = DBL
+//	    *case RndPr(from cmdreg3b{6:5} = 10  then RND_PREC = SGL
+//	     case RndPr(from cmdreg3b{6:5} = 00 | 01
+//		use precision from FPCR{7:6}
+//			case 00 then RND_PREC = EXT
+//			case 01 then RND_PREC = SGL
+//			case 10 then RND_PREC = DBL
+//	else E1
+//	     use precision in FPCR{7:6}
+//	     case 00 then RND_PREC = EXT
+//	     case 01 then RND_PREC = SGL
+//	     case 10 then RND_PREC = DBL
+// end
+//
+g_rndpr:
+	bsr	g_opcls		//get opclass in d0{2:0}
+	cmpw	#0x0003,%d0	//check for opclass 011
+	bnes	op_0x0
+
+//
+// For move out instructions (opclass 011) the destination format
+// is the same as the rounding precision.  Pass results from g_dfmtou.
+//
+	bsr 	g_dfmtou	
+	rts
+op_0x0:
+	btstb	#E3,E_BYTE(%a6)
+	beql	unf_e1_exc	//branch to e1 underflow
+unf_e3_exc:
+	movel	CMDREG3B(%a6),%d0	//rounding precision in d0{10:9}
+	bfextu	%d0{#9:#2},%d0	//move the rounding prec bits to d0{1:0}
+	cmpil	#0x2,%d0
+	beql	unff_sgl	//force precision is single
+	cmpil	#0x3,%d0		//force precision is double
+	beql	unff_dbl
+	movew	CMDREG3B(%a6),%d0	//get the command word again
+	andil	#0x7f,%d0		//clear all except operation
+	cmpil	#0x33,%d0			
+	beql	unf_fsgl	//fsglmul or fsgldiv
+	cmpil	#0x30,%d0
+	beql	unf_fsgl	//fsgldiv or fsglmul
+	bra	unf_fpcr
+unf_e1_exc:
+	movel	CMDREG1B(%a6),%d0	//get 32 bits off the stack, 1st 16 bits
+//				;are the command word
+	andil	#0x00440000,%d0	//clear all bits except bits 6 and 2
+	cmpil	#0x00400000,%d0
+	beql	unff_sgl	//force single
+	cmpil	#0x00440000,%d0	//force double
+	beql	unff_dbl
+	movel	CMDREG1B(%a6),%d0	//get the command word again
+	andil	#0x007f0000,%d0	//clear all bits except the operation
+	cmpil	#0x00270000,%d0
+	beql	unf_fsgl	//fsglmul
+	cmpil	#0x00240000,%d0
+	beql	unf_fsgl	//fsgldiv
+	bra	unf_fpcr
+
+//
+// Convert to return format.  The values from cmdreg3b and the return
+// values are:
+//	cmdreg3b	return	     precision
+//	--------	------	     ---------
+//	  00,01		  0		ext
+//	   10		  1		sgl
+//	   11		  2		dbl
+// Force single
+//
+unff_sgl:
+	movel	#1,%d0		//return 1
+	rts
+//
+// Force double
+//
+unff_dbl:
+	movel	#2,%d0		//return 2
+	rts
+//
+// Force extended
+//
+unf_fsgl:
+	movel	#0,%d0		
+	rts
+//
+// Get rounding precision set in FPCR{7:6}.
+//
+unf_fpcr:
+	movel	USER_FPCR(%a6),%d0 //rounding precision bits in d0{7:6}
+	bfextu	%d0{#24:#2},%d0	//move the rounding prec bits to d0{1:0}
+	rts
+//
+//	g_opcls --- put opclass in d0{2:0}
+//
+g_opcls:
+	btstb	#E3,E_BYTE(%a6)
+	beqs	opc_1b		//if set, go to cmdreg1b
+opc_3b:
+	clrl	%d0		//if E3, only opclass 0x0 is possible
+	rts
+opc_1b:
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#0:#3},%d0	//shift opclass bits d0{31:29} to d0{2:0}
+	rts
+//
+//	g_dfmtou --- put destination format in d0{1:0}
+//
+//	If E1, the format is from cmdreg1b{12:10}
+//	If E3, the format is extended.
+//
+//	Dest. Fmt.	
+//		extended  010 -> 00
+//		single    001 -> 01
+//		double    101 -> 10
+//
+g_dfmtou:
+	btstb	#E3,E_BYTE(%a6)
+	beqs	op011
+	clrl	%d0		//if E1, size is always ext
+	rts
+op011:
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#3:#3},%d0	//dest fmt from cmdreg1b{12:10}
+	cmpb	#1,%d0		//check for single
+	bnes	not_sgl
+	movel	#1,%d0
+	rts
+not_sgl:
+	cmpb	#5,%d0		//check for double
+	bnes	not_dbl
+	movel	#2,%d0
+	rts
+not_dbl:
+	clrl	%d0		//must be extended
+	rts
+
+//
+//
+// Final result table for unf_sub. Note that the negative counterparts
+// are unnecessary as unf_sub always returns the sign separately from
+// the exponent.
+//					;+zero
+EXT_PZRO:	.long	0x00000000,0x00000000,0x00000000,0x00000000	
+//					;+zero
+SGL_PZRO:	.long	0x3f810000,0x00000000,0x00000000,0x00000000	
+//					;+zero
+DBL_PZRO:	.long	0x3c010000,0x00000000,0x00000000,0x00000000	
+//					;smallest +ext denorm
+EXT_PSML:	.long	0x00000000,0x00000000,0x00000001,0x00000000	
+//					;smallest +sgl denorm
+SGL_PSML:	.long	0x3f810000,0x00000100,0x00000000,0x00000000	
+//					;smallest +dbl denorm
+DBL_PSML:	.long	0x3c010000,0x00000000,0x00000800,0x00000000	
+//
+//	UNF_SUB --- underflow result calculation
+//
+// Input:
+//	d0 	contains round precision
+//	a0	points to input operand in the internal extended format
+//
+// Output:
+//	a0 	points to correct internal extended precision result.
+//
+
+tblunf:
+	.long	uEXT_RN
+	.long	uEXT_RZ
+	.long	uEXT_RM
+	.long	uEXT_RP
+	.long	uSGL_RN
+	.long	uSGL_RZ
+	.long	uSGL_RM
+	.long	uSGL_RP
+	.long	uDBL_RN
+	.long	uDBL_RZ
+	.long	uDBL_RM
+	.long	uDBL_RP
+	.long	uDBL_RN
+	.long	uDBL_RZ
+	.long	uDBL_RM
+	.long	uDBL_RP
+
+	.global	unf_sub
+unf_sub:
+	lsll	#2,%d0		//move round precision to d0{3:2}
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1 //set round mode
+	orl	%d1,%d0		//index is fmt:mode in d0{3:0}
+	leal	tblunf,%a1	//load a1 with table address
+	movel	%a1@(%d0:l:4),%a1	//use d0 as index to the table
+	jmp	(%a1)		//go to the correct routine
+//
+//case DEST_FMT = EXT
+//
+uEXT_RN:
+	leal	EXT_PZRO,%a1	//answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign	//now go set the sign	
+uEXT_RZ:
+	leal	EXT_PZRO,%a1	//answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign	//now go set the sign
+uEXT_RM:
+	tstb	LOCAL_SGN(%a0)	//if negative underflow
+	beqs	ue_rm_pos
+ue_rm_neg:
+	leal	EXT_PSML,%a1	//answer is negative smallest denorm
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_unfr
+ue_rm_pos:
+	leal	EXT_PZRO,%a1	//answer is positive zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	end_unfr
+uEXT_RP:
+	tstb	LOCAL_SGN(%a0)	//if negative underflow
+	beqs	ue_rp_pos
+ue_rp_neg:
+	leal	EXT_PZRO,%a1	//answer is negative zero
+	oril	#negz_mask,USER_FPSR(%a6)
+	bra	end_unfr
+ue_rp_pos:
+	leal	EXT_PSML,%a1	//answer is positive smallest denorm
+	bra	end_unfr
+//
+//case DEST_FMT = DBL
+//
+uDBL_RN:
+	leal	DBL_PZRO,%a1	//answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign
+uDBL_RZ:
+	leal	DBL_PZRO,%a1	//answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign	//now go set the sign
+uDBL_RM:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	ud_rm_pos
+ud_rm_neg:
+	leal	DBL_PSML,%a1	//answer is smallest denormalized negative
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_unfr
+ud_rm_pos:
+	leal	DBL_PZRO,%a1	//answer is positive zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	end_unfr
+uDBL_RP:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	ud_rp_pos
+ud_rp_neg:
+	leal	DBL_PZRO,%a1	//answer is negative zero
+	oril	#negz_mask,USER_FPSR(%a6)
+	bra	end_unfr
+ud_rp_pos:
+	leal	DBL_PSML,%a1	//answer is smallest denormalized negative
+	bra	end_unfr
+//
+//case DEST_FMT = SGL
+//
+uSGL_RN:
+	leal	SGL_PZRO,%a1	//answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bras	uset_sign
+uSGL_RZ:
+	leal	SGL_PZRO,%a1	//answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bras	uset_sign
+uSGL_RM:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	us_rm_pos
+us_rm_neg:
+	leal	SGL_PSML,%a1	//answer is smallest denormalized negative
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bras	end_unfr
+us_rm_pos:
+	leal	SGL_PZRO,%a1	//answer is positive zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bras	end_unfr
+uSGL_RP:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	us_rp_pos
+us_rp_neg:
+	leal	SGL_PZRO,%a1	//answer is negative zero
+	oril	#negz_mask,USER_FPSR(%a6)
+	bras	end_unfr
+us_rp_pos:
+	leal	SGL_PSML,%a1	//answer is smallest denormalized positive
+	bras	end_unfr
+
+uset_sign:
+	tstb	LOCAL_SGN(%a0)	//if negative overflow
+	beqs	end_unfr
+uneg_sign:
+	bsetb	#neg_bit,FPSR_CC(%a6)
+
+end_unfr:
+	movew	LOCAL_EX(%a1),LOCAL_EX(%a0) //be careful not to overwrite sign
+	movel	LOCAL_HI(%a1),LOCAL_HI(%a0)
+	movel	LOCAL_LO(%a1),LOCAL_LO(%a0)
+	rts
+//
+//	reg_dest --- write byte, word, or long data to Dn
+//
+//
+// Input:
+//	L_SCR1: Data 
+//	d1:     data size and dest register number formatted as:
+//
+//	32		5    4     3     2     1     0
+//       -----------------------------------------------
+//       |        0        |    Size   |  Dest Reg #   |
+//       -----------------------------------------------
+//
+//	Size is:
+//		0 - Byte
+//		1 - Word
+//		2 - Long/Single
+//
+pregdst:
+	.long	byte_d0
+	.long	byte_d1
+	.long	byte_d2
+	.long	byte_d3
+	.long	byte_d4
+	.long	byte_d5
+	.long	byte_d6
+	.long	byte_d7
+	.long	word_d0
+	.long	word_d1
+	.long	word_d2
+	.long	word_d3
+	.long	word_d4
+	.long	word_d5
+	.long	word_d6
+	.long	word_d7
+	.long	long_d0
+	.long	long_d1
+	.long	long_d2
+	.long	long_d3
+	.long	long_d4
+	.long	long_d5
+	.long	long_d6
+	.long	long_d7
+
+reg_dest:
+	leal	pregdst,%a0
+	movel	%a0@(%d1:l:4),%a0
+	jmp	(%a0)
+
+byte_d0:
+	moveb	L_SCR1(%a6),USER_D0+3(%a6)
+	rts
+byte_d1:
+	moveb	L_SCR1(%a6),USER_D1+3(%a6)
+	rts
+byte_d2:
+	moveb	L_SCR1(%a6),%d2
+	rts
+byte_d3:
+	moveb	L_SCR1(%a6),%d3
+	rts
+byte_d4:
+	moveb	L_SCR1(%a6),%d4
+	rts
+byte_d5:
+	moveb	L_SCR1(%a6),%d5
+	rts
+byte_d6:
+	moveb	L_SCR1(%a6),%d6
+	rts
+byte_d7:
+	moveb	L_SCR1(%a6),%d7
+	rts
+word_d0:
+	movew	L_SCR1(%a6),USER_D0+2(%a6)
+	rts
+word_d1:
+	movew	L_SCR1(%a6),USER_D1+2(%a6)
+	rts
+word_d2:
+	movew	L_SCR1(%a6),%d2
+	rts
+word_d3:
+	movew	L_SCR1(%a6),%d3
+	rts
+word_d4:
+	movew	L_SCR1(%a6),%d4
+	rts
+word_d5:
+	movew	L_SCR1(%a6),%d5
+	rts
+word_d6:
+	movew	L_SCR1(%a6),%d6
+	rts
+word_d7:
+	movew	L_SCR1(%a6),%d7
+	rts
+long_d0:
+	movel	L_SCR1(%a6),USER_D0(%a6)
+	rts
+long_d1:
+	movel	L_SCR1(%a6),USER_D1(%a6)
+	rts
+long_d2:
+	movel	L_SCR1(%a6),%d2
+	rts
+long_d3:
+	movel	L_SCR1(%a6),%d3
+	rts
+long_d4:
+	movel	L_SCR1(%a6),%d4
+	rts
+long_d5:
+	movel	L_SCR1(%a6),%d5
+	rts
+long_d6:
+	movel	L_SCR1(%a6),%d6
+	rts
+long_d7:
+	movel	L_SCR1(%a6),%d7
+	rts
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_bsun.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_bsun.s
new file mode 100644
index 0000000000..c5094f0159
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_bsun.s
@@ -0,0 +1,47 @@
+//
+//	x_bsun.sa 3.3 7/1/91
+//
+//	fpsp_bsun --- FPSP handler for branch/set on unordered exception
+//
+//	Copy the PC to FPIAR to maintain 881/882 compatibility
+//
+//	The real_bsun handler will need to perform further corrective
+//	measures as outlined in the 040 User's Manual on pages
+//	9-41f, section 9.8.3.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_BSUN:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	real_bsun
+
+	.global	fpsp_bsun
+fpsp_bsun:
+//
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+//
+	movel		EXC_PC(%a6),USER_FPIAR(%a6)
+//
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_bsun
+//
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_fline.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_fline.s
new file mode 100644
index 0000000000..100a86a083
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_fline.s
@@ -0,0 +1,104 @@
+//
+//	x_fline.sa 3.3 1/10/91
+//
+//	fpsp_fline --- FPSP handler for fline exception
+//
+//	First determine if the exception is one of the unimplemented
+//	floating point instructions.  If so, let fpsp_unimp handle it.
+//	Next, determine if the instruction is an fmovecr with a non-zero
+//	<ea> field.  If so, handle here and return.  Otherwise, it
+//	must be a real F-line exception.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_FLINE:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	real_fline
+	|xref	fpsp_unimp
+	|xref	uni_2
+	|xref	mem_read
+	|xref	fpsp_fmt_error
+
+	.global	fpsp_fline
+fpsp_fline:
+//
+//	check for unimplemented vector first.  Use EXC_VEC-4 because
+//	the equate is valid only after a 'link a6' has pushed one more
+//	long onto the stack.
+//
+	cmpw	#UNIMP_VEC,EXC_VEC-4(%a7)
+	beql	fpsp_unimp
+
+//
+//	fmovecr with non-zero <ea> handling here
+//
+	subl	#4,%a7		//4 accounts for 2-word difference
+//				;between six word frame (unimp) and
+//				;four word frame
+	link	%a6,#-LOCAL_SIZE
+	fsave	-(%a7)
+	moveml	%d0-%d1/%a0-%a1,USER_DA(%a6)
+	moveal	EXC_PC+4(%a6),%a0	//get address of fline instruction
+	leal	L_SCR1(%a6),%a1	//use L_SCR1 as scratch
+	movel	#4,%d0
+	addl	#4,%a6		//to offset the sub.l #4,a7 above so that
+//				;a6 can point correctly to the stack frame 
+//				;before branching to mem_read
+	bsrl	mem_read
+	subl	#4,%a6
+	movel	L_SCR1(%a6),%d0	//d0 contains the fline and command word
+	bfextu	%d0{#4:#3},%d1	//extract coprocessor id
+	cmpib	#1,%d1		//check if cpid=1
+	bne	not_mvcr	//exit if not
+	bfextu	%d0{#16:#6},%d1
+	cmpib	#0x17,%d1		//check if it is an FMOVECR encoding
+	bne	not_mvcr	
+//				;if an FMOVECR instruction, fix stack
+//				;and go to FPSP_UNIMP
+fix_stack:
+	cmpib	#VER_40,(%a7)	//test for orig unimp frame
+	bnes	ck_rev
+	subl	#UNIMP_40_SIZE-4,%a7 //emulate an orig fsave
+	moveb	#VER_40,(%a7)
+	moveb	#UNIMP_40_SIZE-4,1(%a7)
+	clrw	2(%a7)
+	bras	fix_con
+ck_rev:
+	cmpib	#VER_41,(%a7)	//test for rev unimp frame
+	bnel	fpsp_fmt_error	//if not $40 or $41, exit with error
+	subl	#UNIMP_41_SIZE-4,%a7 //emulate a rev fsave
+	moveb	#VER_41,(%a7)
+	moveb	#UNIMP_41_SIZE-4,1(%a7)
+	clrw	2(%a7)
+fix_con:
+	movew	EXC_SR+4(%a6),EXC_SR(%a6) //move stacked sr to new position
+	movel	EXC_PC+4(%a6),EXC_PC(%a6) //move stacked pc to new position
+	fmovel	EXC_PC(%a6),%FPIAR //point FPIAR to fline inst
+	movel	#4,%d1
+	addl	%d1,EXC_PC(%a6)	//increment stacked pc value to next inst
+	movew	#0x202c,EXC_VEC(%a6) //reformat vector to unimp
+	clrl	EXC_EA(%a6)	//clear the EXC_EA field
+	movew	%d0,CMDREG1B(%a6) //move the lower word into CMDREG1B
+	clrl	E_BYTE(%a6)
+	bsetb	#UFLAG,T_BYTE(%a6)
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 //restore data registers
+	bral	uni_2
+
+not_mvcr:
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 //restore data registers
+	frestore (%a7)+
+	unlk	%a6
+	addl	#4,%a7
+	bral	real_fline
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_operr.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_operr.s
new file mode 100644
index 0000000000..7d63af6dd9
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_operr.s
@@ -0,0 +1,356 @@
+//
+//	x_operr.sa 3.5 7/1/91
+//
+//	fpsp_operr --- FPSP handler for operand error exception
+//
+//	See 68040 User's Manual pp. 9-44f
+//
+// Note 1: For trap disabled 040 does the following:
+// If the dest is a fp reg, then an extended precision non_signaling
+// NAN is stored in the dest reg.  If the dest format is b, w, or l and
+// the source op is a NAN, then garbage is stored as the result (actually
+// the upper 32 bits of the mantissa are sent to the integer unit). If
+// the dest format is integer (b, w, l) and the operr is caused by
+// integer overflow, or the source op is inf, then the result stored is
+// garbage.
+// There are three cases in which operr is incorrectly signaled on the 
+// 040.  This occurs for move_out of format b, w, or l for the largest 
+// negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
+//
+//	  On opclass = 011 fmove.(b,w,l) that causes a conversion
+//	  overflow -> OPERR, the exponent in wbte (and fpte) is:
+//		byte    56 - (62 - exp)
+//		word    48 - (62 - exp)
+//		long    32 - (62 - exp)
+//
+//			where exp = (true exp) - 1
+//
+//  So, wbtemp and fptemp will contain the following on erroneously
+//	  signalled operr:
+//			fpts = 1
+//			fpte = $4000  (15 bit externally)
+//		byte	fptm = $ffffffff ffffff80
+//		word	fptm = $ffffffff ffff8000
+//		long	fptm = $ffffffff 80000000
+//
+// Note 2: For trap enabled 040 does the following:
+// If the inst is move_out, then same as Note 1.
+// If the inst is not move_out, the dest is not modified.
+// The exceptional operand is not defined for integer overflow 
+// during a move_out.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_OPERR:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	mem_write
+	|xref	real_operr
+	|xref	real_inex
+	|xref	get_fline
+	|xref	fpsp_done
+	|xref	reg_dest
+
+	.global	fpsp_operr
+fpsp_operr:
+//
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+//
+// Check if this is an opclass 3 instruction.
+//  If so, fall through, else branch to operr_end
+//
+	btstb	#TFLAG,T_BYTE(%a6)
+	beqs	operr_end
+
+//
+// If the destination size is B,W,or L, the operr must be 
+// handled here.
+//
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#3:#3},%d0	//0=long, 4=word, 6=byte
+	cmpib	#0,%d0		//determine size; check long
+	beq	operr_long
+	cmpib	#4,%d0		//check word
+	beq	operr_word
+	cmpib	#6,%d0		//check byte
+	beq	operr_byte
+
+//
+// The size is not B,W,or L, so the operr is handled by the 
+// kernel handler.  Set the operr bits and clean up, leaving
+// only the integer exception frame on the stack, and the 
+// fpu in the original exceptional state.
+//
+operr_end:
+	bsetb		#operr_bit,FPSR_EXCEPT(%a6)
+	bsetb		#aiop_bit,FPSR_AEXCEPT(%a6)
+
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_operr
+
+operr_long:
+	moveql	#4,%d1		//write size to d1
+	moveb	STAG(%a6),%d0	//test stag for nan
+	andib	#0xe0,%d0		//clr all but tag
+	cmpib	#0x60,%d0		//check for nan
+	beq	operr_nan	
+	cmpil	#0x80000000,FPTEMP_LO(%a6) //test if ls lword is special
+	bnes	chklerr		//if not equal, check for incorrect operr
+	bsr	check_upper	//check if exp and ms mant are special
+	tstl	%d0
+	bnes	chklerr		//if d0 is true, check for incorrect operr
+	movel	#0x80000000,%d0	//store special case result
+	bsr	operr_store
+	bra	not_enabled	//clean and exit
+//
+//	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+//
+chklerr:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	//ignore sign bit
+	cmpw	#0x3FFE,%d0	//this is the only possible exponent value
+	bnes	chklerr2
+fixlong:
+	movel	FPTEMP_LO(%a6),%d0
+	bsr	operr_store
+	bra	not_enabled
+chklerr2:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	//ignore sign bit
+	cmpw	#0x4000,%d0
+	bcc	store_max	//exponent out of range
+
+	movel	FPTEMP_LO(%a6),%d0
+	andl	#0x7FFF0000,%d0	//look for all 1's on bits 30-16
+	cmpl	#0x7FFF0000,%d0
+	beqs	fixlong
+
+	tstl	FPTEMP_LO(%a6)
+	bpls	chklepos
+	cmpl	#0xFFFFFFFF,FPTEMP_HI(%a6)
+	beqs	fixlong
+	bra	store_max
+chklepos:
+	tstl	FPTEMP_HI(%a6)
+	beqs	fixlong
+	bra	store_max
+
+operr_word:
+	moveql	#2,%d1		//write size to d1
+	moveb	STAG(%a6),%d0	//test stag for nan
+	andib	#0xe0,%d0		//clr all but tag
+	cmpib	#0x60,%d0		//check for nan
+	beq	operr_nan	
+	cmpil	#0xffff8000,FPTEMP_LO(%a6) //test if ls lword is special
+	bnes	chkwerr		//if not equal, check for incorrect operr
+	bsr	check_upper	//check if exp and ms mant are special
+	tstl	%d0
+	bnes	chkwerr		//if d0 is true, check for incorrect operr
+	movel	#0x80000000,%d0	//store special case result
+	bsr	operr_store
+	bra	not_enabled	//clean and exit
+//
+//	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+//
+chkwerr:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	//ignore sign bit
+	cmpw	#0x3FFE,%d0	//this is the only possible exponent value
+	bnes	store_max
+	movel	FPTEMP_LO(%a6),%d0
+	swap	%d0
+	bsr	operr_store
+	bra	not_enabled
+
+operr_byte:
+	moveql	#1,%d1		//write size to d1
+	moveb	STAG(%a6),%d0	//test stag for nan
+	andib	#0xe0,%d0		//clr all but tag
+	cmpib	#0x60,%d0		//check for nan
+	beqs	operr_nan	
+	cmpil	#0xffffff80,FPTEMP_LO(%a6) //test if ls lword is special
+	bnes	chkberr		//if not equal, check for incorrect operr
+	bsr	check_upper	//check if exp and ms mant are special
+	tstl	%d0
+	bnes	chkberr		//if d0 is true, check for incorrect operr
+	movel	#0x80000000,%d0	//store special case result
+	bsr	operr_store
+	bra	not_enabled	//clean and exit
+//
+//	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+//
+chkberr:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	//ignore sign bit
+	cmpw	#0x3FFE,%d0	//this is the only possible exponent value
+	bnes	store_max
+	movel	FPTEMP_LO(%a6),%d0
+	asll	#8,%d0
+	swap	%d0
+	bsr	operr_store
+	bra	not_enabled
+
+//
+// This operr condition is not of the special case.  Set operr
+// and aiop and write the portion of the nan to memory for the
+// given size.
+//
+operr_nan:
+	orl	#opaop_mask,USER_FPSR(%a6) //set operr & aiop
+
+	movel	ETEMP_HI(%a6),%d0	//output will be from upper 32 bits
+	bsr	operr_store
+	bra	end_operr
+//
+// Store_max loads the max pos or negative for the size, sets
+// the operr and aiop bits, and clears inex and ainex, incorrectly
+// set by the 040.
+//
+store_max:
+	orl	#opaop_mask,USER_FPSR(%a6) //set operr & aiop
+	bclrb	#inex2_bit,FPSR_EXCEPT(%a6)
+	bclrb	#ainex_bit,FPSR_AEXCEPT(%a6)
+	fmovel	#0,%FPSR
+	
+	tstw	FPTEMP_EX(%a6)	//check sign
+	blts	load_neg
+	movel	#0x7fffffff,%d0
+	bsr	operr_store
+	bra	end_operr
+load_neg:
+	movel	#0x80000000,%d0
+	bsr	operr_store
+	bra	end_operr
+
+//
+// This routine stores the data in d0, for the given size in d1,
+// to memory or data register as required.  A read of the fline
+// is required to determine the destination.
+//
+operr_store:
+	movel	%d0,L_SCR1(%a6)	//move write data to L_SCR1
+	movel	%d1,-(%a7)	//save register size
+	bsrl	get_fline	//fline returned in d0
+	movel	(%a7)+,%d1
+	bftst	%d0{#26:#3}		//if mode is zero, dest is Dn
+	bnes	dest_mem
+//
+// Destination is Dn.  Get register number from d0. Data is on
+// the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
+//
+	andil	#7,%d0		//isolate register number
+	cmpil	#4,%d1
+	beqs	op_long		//the most frequent case
+	cmpil	#2,%d1
+	bnes	op_con
+	orl	#8,%d0
+	bras	op_con
+op_long:
+	orl	#0x10,%d0
+op_con:
+	movel	%d0,%d1		//format size:reg for reg_dest
+	bral	reg_dest	//call to reg_dest returns to caller
+//				;of operr_store
+//
+// Destination is memory.  Get <ea> from integer exception frame
+// and call mem_write.
+//
+dest_mem:
+	leal	L_SCR1(%a6),%a0	//put ptr to write data in a0
+	movel	EXC_EA(%a6),%a1	//put user destination address in a1
+	movel	%d1,%d0		//put size in d0
+	bsrl	mem_write
+	rts
+//
+// Check the exponent for $c000 and the upper 32 bits of the 
+// mantissa for $ffffffff.  If both are true, return d0 clr
+// and store the lower n bits of the least lword of FPTEMP
+// to d0 for write out.  If not, it is a real operr, and set d0.
+//
+check_upper:
+	cmpil	#0xffffffff,FPTEMP_HI(%a6) //check if first byte is all 1's
+	bnes	true_operr	//if not all 1's then was true operr
+	cmpiw	#0xc000,FPTEMP_EX(%a6) //check if incorrectly signalled
+	beqs	not_true_operr	//branch if not true operr
+	cmpiw	#0xbfff,FPTEMP_EX(%a6) //check if incorrectly signalled
+	beqs	not_true_operr	//branch if not true operr
+true_operr:
+	movel	#1,%d0		//signal real operr
+	rts
+not_true_operr:
+	clrl	%d0		//signal no real operr
+	rts
+
+//
+// End_operr tests for operr enabled.  If not, it cleans up the stack
+// and does an rte.  If enabled, it cleans up the stack and branches
+// to the kernel operr handler with only the integer exception
+// frame on the stack and the fpu in the original exceptional state
+// with correct data written to the destination.
+//
+end_operr:
+	btstb		#operr_bit,FPCR_ENABLE(%a6)
+	beqs		not_enabled
+enabled:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_operr
+
+not_enabled:
+//
+// It is possible to have either inex2 or inex1 exceptions with the
+// operr.  If the inex enable bit is set in the FPCR, and either
+// inex2 or inex1 occurred, we must clean up and branch to the
+// real inex handler.
+//
+ck_inex:
+	moveb	FPCR_ENABLE(%a6),%d0
+	andb	FPSR_EXCEPT(%a6),%d0
+	andib	#0x3,%d0
+	beq	operr_exit
+//
+// Inexact enabled and reported, and we must take an inexact exception.
+//
+take_inex:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_inex
+//
+// Since operr is only an E1 exception, there is no need to frestore
+// any state back to the fpu.
+//
+operr_exit:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_ovfl.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_ovfl.s
new file mode 100644
index 0000000000..29b2c1f06d
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_ovfl.s
@@ -0,0 +1,186 @@
+//
+//	x_ovfl.sa 3.5 7/1/91
+//
+//	fpsp_ovfl --- FPSP handler for overflow exception
+//
+//	Overflow occurs when a floating-point intermediate result is
+//	too large to be represented in a floating-point data register,
+//	or when storing to memory, the contents of a floating-point
+//	data register are too large to be represented in the
+//	destination format.
+//		
+// Trap disabled results
+//
+// If the instruction is move_out, then garbage is stored in the
+// destination.  If the instruction is not move_out, then the
+// destination is not affected.  For 68881 compatibility, the
+// following values should be stored at the destination, based
+// on the current rounding mode:
+//
+//  RN	Infinity with the sign of the intermediate result.
+//  RZ	Largest magnitude number, with the sign of the
+//	intermediate result.
+//  RM   For pos overflow, the largest pos number. For neg overflow,
+//	-infinity
+//  RP   For pos overflow, +infinity. For neg overflow, the largest
+//	neg number
+//
+// Trap enabled results
+// All trap disabled code applies.  In addition the exceptional
+// operand needs to be made available to the users exception handler
+// with a bias of $6000 subtracted from the exponent.
+//
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_OVFL:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	ovf_r_x2
+	|xref	ovf_r_x3
+	|xref	store
+	|xref	real_ovfl
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	g_opcls
+	|xref	b1238_fix
+
+	.global	fpsp_ovfl
+fpsp_ovfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+//
+//	The 040 doesn't set the AINEX bit in the FPSR, the following
+//	line temporarily rectifies this error.
+//
+	bsetb	#ainex_bit,FPSR_AEXCEPT(%a6)
+//
+	bsrl	ovf_adj		//denormalize, round & store interm op
+//
+//	if overflow traps not enabled check for inexact exception
+//
+	btstb	#ovfl_bit,FPCR_ENABLE(%a6)
+	beqs	ck_inex	
+//
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_1
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_1:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_ovfl
+//
+// It is possible to have either inex2 or inex1 exceptions with the
+// ovfl.  If the inex enable bit is set in the FPCR, and either
+// inex2 or inex1 occurred, we must clean up and branch to the
+// real inex handler.
+//
+ck_inex:
+//	move.b		FPCR_ENABLE(%a6),%d0
+//	and.b		FPSR_EXCEPT(%a6),%d0
+//	andi.b		#$3,%d0
+	btstb		#inex2_bit,FPCR_ENABLE(%a6)
+	beqs		ovfl_exit
+//
+// Inexact enabled and reported, and we must take an inexact exception.
+//
+take_inex:
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_2
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_2:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_inex
+	
+ovfl_exit:
+	bclrb	#E3,E_BYTE(%a6)	//test and clear E3 bit
+	beqs	e1_set
+//
+// Clear dirty bit on dest resister in the frame before branching
+// to b1238_fix.
+//
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix		//test for bug1238 case
+
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+e1_set:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+
+//
+//	ovf_adj
+//
+ovf_adj:
+//
+// Have a0 point to the correct operand. 
+//
+	btstb	#E3,E_BYTE(%a6)	//test E3 bit
+	beqs	ovf_e1
+
+	lea	WBTEMP(%a6),%a0
+	bras	ovf_com
+ovf_e1:
+	lea	ETEMP(%a6),%a0
+
+ovf_com:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+
+	bsrl	g_opcls		//returns opclass in d0
+	cmpiw	#3,%d0		//check for opclass3
+	bnes	not_opc011
+
+//
+// FPSR_CC is saved and restored because ovf_r_x3 affects it. The
+// CCs are defined to be 'not affected' for the opclass3 instruction.
+//
+	moveb	FPSR_CC(%a6),L_SCR1(%a6)
+ 	bsrl	ovf_r_x3	//returns a0 pointing to result
+	moveb	L_SCR1(%a6),FPSR_CC(%a6)
+	bral	store		//stores to memory or register
+	
+not_opc011:
+	bsrl	ovf_r_x2	//returns a0 pointing to result
+	bral	store		//stores to memory or register
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_snan.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_snan.s
new file mode 100644
index 0000000000..a89369e3f3
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_snan.s
@@ -0,0 +1,277 @@
+//
+//	x_snan.sa 3.3 7/1/91
+//
+// fpsp_snan --- FPSP handler for signalling NAN exception
+//
+// SNAN for float -> integer conversions (integer conversion of
+// an SNAN) is a non-maskable run-time exception.
+//
+// For trap disabled the 040 does the following:
+// If the dest data format is s, d, or x, then the SNAN bit in the NAN
+// is set to one and the resulting non-signaling NAN (truncated if
+// necessary) is transferred to the dest.  If the dest format is b, w,
+// or l, then garbage is written to the dest (actually the upper 32 bits
+// of the mantissa are sent to the integer unit).
+//
+// For trap enabled the 040 does the following:
+// If the inst is move_out, then the results are the same as for trap 
+// disabled with the exception posted.  If the instruction is not move_
+// out, the dest. is not modified, and the exception is posted.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_SNAN:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	get_fline
+	|xref	mem_write
+	|xref	real_snan
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	reg_dest
+
+	.global	fpsp_snan
+fpsp_snan:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+//
+// Check if trap enabled
+//
+	btstb		#snan_bit,FPCR_ENABLE(%a6)
+	bnes		ena		//If enabled, then branch
+
+	bsrl		move_out	//else SNAN disabled
+//
+// It is possible to have an inex1 exception with the
+// snan.  If the inex enable bit is set in the FPCR, and either
+// inex2 or inex1 occurred, we must clean up and branch to the
+// real inex handler.
+//
+ck_inex:
+	moveb	FPCR_ENABLE(%a6),%d0
+	andb	FPSR_EXCEPT(%a6),%d0
+	andib	#0x3,%d0
+	beq	end_snan
+//
+// Inexact enabled and reported, and we must take an inexact exception.
+//
+take_inex:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_inex
+//
+// SNAN is enabled.  Check if inst is move_out.
+// Make any corrections to the 040 output as necessary.
+//
+ena:
+	btstb		#5,CMDREG1B(%a6) //if set, inst is move out
+	beq		not_out
+
+	bsrl		move_out
+
+report_snan:
+	moveb		(%a7),VER_TMP(%a6)
+	cmpib		#VER_40,(%a7)	//test for orig unimp frame
+	bnes		ck_rev
+	moveql		#13,%d0		//need to zero 14 lwords
+	bras		rep_con
+ck_rev:
+	moveql		#11,%d0		//need to zero 12 lwords
+rep_con:
+	clrl		(%a7)
+loop1:
+	clrl		-(%a7)		//clear and dec a7
+	dbra		%d0,loop1
+	moveb		VER_TMP(%a6),(%a7) //format a busy frame
+	moveb		#BUSY_SIZE-4,1(%a7)
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_snan
+//
+// Exit snan handler by expanding the unimp frame into a busy frame
+//
+end_snan:
+	bclrb		#E1,E_BYTE(%a6)
+
+	moveb		(%a7),VER_TMP(%a6)
+	cmpib		#VER_40,(%a7)	//test for orig unimp frame
+	bnes		ck_rev2
+	moveql		#13,%d0		//need to zero 14 lwords
+	bras		rep_con2
+ck_rev2:
+	moveql		#11,%d0		//need to zero 12 lwords
+rep_con2:
+	clrl		(%a7)
+loop2:
+	clrl		-(%a7)		//clear and dec a7
+	dbra		%d0,loop2
+	moveb		VER_TMP(%a6),(%a7) //format a busy frame
+	moveb		#BUSY_SIZE-4,1(%a7) //write busy size
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+
+//
+// Move_out 
+//
+move_out:
+	movel		EXC_EA(%a6),%a0	//get <ea> from exc frame
+
+	bfextu		CMDREG1B(%a6){#3:#3},%d0 //move rx field to d0{2:0}
+	cmpil		#0,%d0		//check for long
+	beqs		sto_long	//branch if move_out long
+	
+	cmpil		#4,%d0		//check for word
+	beqs		sto_word	//branch if move_out word
+	
+	cmpil		#6,%d0		//check for byte
+	beqs		sto_byte	//branch if move_out byte
+	
+//
+// Not byte, word or long
+//
+	rts
+//	
+// Get the 32 most significant bits of etemp mantissa
+//
+sto_long:
+	movel		ETEMP_HI(%a6),%d1
+	movel		#4,%d0		//load byte count
+//
+// Set signalling nan bit
+//
+	bsetl		#30,%d1			
+//
+// Store to the users destination address
+//
+	tstl		%a0		//check if <ea> is 0
+	beqs		wrt_dn		//destination is a data register
+	
+	movel		%d1,-(%a7)	//move the snan onto the stack
+	movel		%a0,%a1		//load dest addr into a1
+	movel		%a7,%a0		//load src addr of snan into a0
+	bsrl		mem_write	//write snan to user memory
+	movel		(%a7)+,%d1	//clear off stack
+	rts
+//
+// Get the 16 most significant bits of etemp mantissa
+//
+sto_word:
+	movel		ETEMP_HI(%a6),%d1
+	movel		#2,%d0		//load byte count
+//
+// Set signalling nan bit
+//
+	bsetl		#30,%d1			
+//
+// Store to the users destination address
+//
+	tstl		%a0		//check if <ea> is 0
+	beqs		wrt_dn		//destination is a data register
+
+	movel		%d1,-(%a7)	//move the snan onto the stack
+	movel		%a0,%a1		//load dest addr into a1
+	movel		%a7,%a0		//point to low word
+	bsrl		mem_write	//write snan to user memory
+	movel		(%a7)+,%d1	//clear off stack
+	rts
+//
+// Get the 8 most significant bits of etemp mantissa
+//
+sto_byte:
+	movel		ETEMP_HI(%a6),%d1
+	movel		#1,%d0		//load byte count
+//
+// Set signalling nan bit
+//
+	bsetl		#30,%d1			
+//
+// Store to the users destination address
+//
+	tstl		%a0		//check if <ea> is 0
+	beqs		wrt_dn		//destination is a data register
+	movel		%d1,-(%a7)	//move the snan onto the stack
+	movel		%a0,%a1		//load dest addr into a1
+	movel		%a7,%a0		//point to source byte
+	bsrl		mem_write	//write snan to user memory
+	movel		(%a7)+,%d1	//clear off stack
+	rts
+
+//
+//	wrt_dn --- write to a data register
+//
+//	We get here with D1 containing the data to write and D0 the
+//	number of bytes to write: 1=byte,2=word,4=long.
+//
+wrt_dn:
+	movel		%d1,L_SCR1(%a6)	//data
+	movel		%d0,-(%a7)	//size
+	bsrl		get_fline	//returns fline word in d0
+	movel		%d0,%d1
+	andil		#0x7,%d1		//d1 now holds register number
+	movel		(%sp)+,%d0	//get original size
+	cmpil		#4,%d0
+	beqs		wrt_long
+	cmpil		#2,%d0
+	bnes		wrt_byte
+wrt_word:
+	orl		#0x8,%d1
+	bral		reg_dest
+wrt_long:
+	orl		#0x10,%d1
+	bral		reg_dest
+wrt_byte:
+	bral		reg_dest
+//
+// Check if it is a src nan or dst nan
+//
+not_out:
+	movel		DTAG(%a6),%d0	
+	bfextu		%d0{#0:#3},%d0	//isolate dtag in lsbs
+
+	cmpib		#3,%d0		//check for nan in destination
+	bnes		issrc		//destination nan has priority
+dst_nan:
+	btstb		#6,FPTEMP_HI(%a6) //check if dest nan is an snan
+	bnes		issrc		//no, so check source for snan
+	movew		FPTEMP_EX(%a6),%d0
+	bras		cont
+issrc:
+	movew		ETEMP_EX(%a6),%d0
+cont:
+	btstl		#15,%d0		//test for sign of snan
+	beqs		clr_neg
+	bsetb		#neg_bit,FPSR_CC(%a6)
+	bra		report_snan
+clr_neg:
+	bclrb		#neg_bit,FPSR_CC(%a6)
+	bra		report_snan
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_store.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_store.s
new file mode 100644
index 0000000000..0dcb6798f7
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_store.s
@@ -0,0 +1,256 @@
+//
+//	x_store.sa 3.2 1/24/91
+//
+//	store --- store operand to memory or register
+//
+//	Used by underflow and overflow handlers.
+//
+//	a6 = points to fp value to be stored.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_STORE:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+fpreg_mask:
+	.byte	0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01
+
+	.include "fpsp.defs"
+
+	|xref	mem_write
+	|xref	get_fline
+	|xref	g_opcls
+	|xref	g_dfmtou
+	|xref	reg_dest
+
+	.global	dest_ext
+	.global	dest_dbl
+	.global	dest_sgl
+
+	.global	store
+store:
+	btstb	#E3,E_BYTE(%a6)
+	beqs	E1_sto
+E3_sto:
+	movel	CMDREG3B(%a6),%d0
+	bfextu	%d0{#6:#3},%d0		//isolate dest. reg from cmdreg3b
+sto_fp:
+	lea	fpreg_mask,%a1
+	moveb	(%a1,%d0.w),%d0		//convert reg# to dynamic register mask
+	tstb	LOCAL_SGN(%a0)
+	beqs	is_pos
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+is_pos:
+	fmovemx (%a0),%d0		//move to correct register
+//
+//	if fp0-fp3 is being modified, we must put a copy
+//	in the USER_FPn variable on the stack because all exception
+//	handlers restore fp0-fp3 from there.
+//
+	cmpb	#0x80,%d0		
+	bnes	not_fp0
+	fmovemx %fp0-%fp0,USER_FP0(%a6)
+	rts
+not_fp0:
+	cmpb	#0x40,%d0
+	bnes	not_fp1
+	fmovemx %fp1-%fp1,USER_FP1(%a6)
+	rts
+not_fp1:
+	cmpb	#0x20,%d0
+	bnes	not_fp2
+	fmovemx %fp2-%fp2,USER_FP2(%a6)
+	rts
+not_fp2:
+	cmpb	#0x10,%d0
+	bnes	not_fp3
+	fmovemx %fp3-%fp3,USER_FP3(%a6)
+	rts
+not_fp3:
+	rts
+
+E1_sto:
+	bsrl	g_opcls		//returns opclass in d0
+	cmpib	#3,%d0
+	beq	opc011		//branch if opclass 3
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#6:#3},%d0	//extract destination register
+	bras	sto_fp
+
+opc011:
+	bsrl	g_dfmtou	//returns dest format in d0
+//				;ext=00, sgl=01, dbl=10
+	movel	%a0,%a1		//save source addr in a1
+	movel	EXC_EA(%a6),%a0	//get the address
+	cmpil	#0,%d0		//if dest format is extended
+	beq	dest_ext	//then branch
+	cmpil	#1,%d0		//if dest format is single
+	beqs	dest_sgl	//then branch
+//
+//	fall through to dest_dbl
+//
+
+//
+//	dest_dbl --- write double precision value to user space
+//
+//Input
+//	a0 -> destination address
+//	a1 -> source in extended precision
+//Output
+//	a0 -> destroyed
+//	a1 -> destroyed
+//	d0 -> 0
+//
+//Changes extended precision to double precision.
+// Note: no attempt is made to round the extended value to double.
+//	dbl_sign = ext_sign
+//	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
+//	get rid of ext integer bit
+//	dbl_mant = ext_mant{62:12}
+//
+//	    	---------------   ---------------    ---------------
+//  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
+//	    	---------------   ---------------    ---------------
+//	   	 95	    64    63 62	      32      31     11	  0
+//				     |			     |
+//				     |			     |
+//				     |			     |
+//		 	             v   		     v
+//	    		      ---------------   ---------------
+//  double   ->  	      |s|exp| mant  |   |  mant       |
+//	    		      ---------------   ---------------
+//	   	 	      63     51   32   31	       0
+//
+dest_dbl:
+	clrl	%d0		//clear d0
+	movew	LOCAL_EX(%a1),%d0	//get exponent
+	subw	#0x3fff,%d0	//subtract extended precision bias
+	cmpw	#0x4000,%d0	//check if inf
+	beqs	inf		//if so, special case
+	addw	#0x3ff,%d0	//add double precision bias
+	swap	%d0		//d0 now in upper word
+	lsll	#4,%d0		//d0 now in proper place for dbl prec exp
+	tstb	LOCAL_SGN(%a1)	
+	beqs	get_mant	//if positive, go process mantissa
+	bsetl	#31,%d0		//if negative, put in sign information
+//				; before continuing
+	bras	get_mant	//go process mantissa
+inf:
+	movel	#0x7ff00000,%d0	//load dbl inf exponent
+	clrl	LOCAL_HI(%a1)	//clear msb
+	tstb	LOCAL_SGN(%a1)
+	beqs	dbl_inf		//if positive, go ahead and write it
+	bsetl	#31,%d0		//if negative put in sign information
+dbl_inf:
+	movel	%d0,LOCAL_EX(%a1)	//put the new exp back on the stack
+	bras	dbl_wrt
+get_mant:
+	movel	LOCAL_HI(%a1),%d1	//get ms mantissa
+	bfextu	%d1{#1:#20},%d1	//get upper 20 bits of ms
+	orl	%d1,%d0		//put these bits in ms word of double
+	movel	%d0,LOCAL_EX(%a1)	//put the new exp back on the stack
+	movel	LOCAL_HI(%a1),%d1	//get ms mantissa
+	movel	#21,%d0		//load shift count
+	lsll	%d0,%d1		//put lower 11 bits in upper bits
+	movel	%d1,LOCAL_HI(%a1)	//build lower lword in memory
+	movel	LOCAL_LO(%a1),%d1	//get ls mantissa
+	bfextu	%d1{#0:#21},%d0	//get ls 21 bits of double
+	orl	%d0,LOCAL_HI(%a1)	//put them in double result
+dbl_wrt:
+	movel	#0x8,%d0		//byte count for double precision number
+	exg	%a0,%a1		//a0=supervisor source, a1=user dest
+	bsrl	mem_write	//move the number to the user's memory
+	rts
+//
+//	dest_sgl --- write single precision value to user space
+//
+//Input
+//	a0 -> destination address
+//	a1 -> source in extended precision
+//
+//Output
+//	a0 -> destroyed
+//	a1 -> destroyed
+//	d0 -> 0
+//
+//Changes extended precision to single precision.
+//	sgl_sign = ext_sign
+//	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
+//	get rid of ext integer bit
+//	sgl_mant = ext_mant{62:12}
+//
+//	    	---------------   ---------------    ---------------
+//  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
+//	    	---------------   ---------------    ---------------
+//	   	 95	    64    63 62	   40 32      31     12	  0
+//				     |	   |
+//				     |	   |
+//				     |	   |
+//		 	             v     v
+//	    		      ---------------
+//  single   ->  	      |s|exp| mant  |
+//	    		      ---------------
+//	   	 	      31     22     0
+//
+dest_sgl:
+	clrl	%d0
+	movew	LOCAL_EX(%a1),%d0	//get exponent
+	subw	#0x3fff,%d0	//subtract extended precision bias
+	cmpw	#0x4000,%d0	//check if inf
+	beqs	sinf		//if so, special case
+	addw	#0x7f,%d0		//add single precision bias
+	swap	%d0		//put exp in upper word of d0
+	lsll	#7,%d0		//shift it into single exp bits
+	tstb	LOCAL_SGN(%a1)	
+	beqs	get_sman	//if positive, continue
+	bsetl	#31,%d0		//if negative, put in sign first
+	bras	get_sman	//get mantissa
+sinf:
+	movel	#0x7f800000,%d0	//load single inf exp to d0
+	tstb	LOCAL_SGN(%a1)
+	beqs	sgl_wrt		//if positive, continue
+	bsetl	#31,%d0		//if negative, put in sign info
+	bras	sgl_wrt
+
+get_sman:
+	movel	LOCAL_HI(%a1),%d1	//get ms mantissa
+	bfextu	%d1{#1:#23},%d1	//get upper 23 bits of ms
+	orl	%d1,%d0		//put these bits in ms word of single
+
+sgl_wrt:
+	movel	%d0,L_SCR1(%a6)	//put the new exp back on the stack
+	movel	#0x4,%d0		//byte count for single precision number
+	tstl	%a0		//users destination address
+	beqs	sgl_Dn		//destination is a data register
+	exg	%a0,%a1		//a0=supervisor source, a1=user dest
+	leal	L_SCR1(%a6),%a0	//point a0 to data
+	bsrl	mem_write	//move the number to the user's memory
+	rts
+sgl_Dn:
+	bsrl	get_fline	//returns fline word in d0
+	andw	#0x7,%d0		//isolate register number
+	movel	%d0,%d1		//d1 has size:reg formatted for reg_dest
+	orl	#0x10,%d1		//reg_dest wants size added to reg#
+	bral	reg_dest	//size is X, rts in reg_dest will
+//				;return to caller of dest_sgl
+	
+dest_ext:
+	tstb	LOCAL_SGN(%a1)	//put back sign into exponent word
+	beqs	dstx_cont
+	bsetb	#sign_bit,LOCAL_EX(%a1)
+dstx_cont:
+	clrb	LOCAL_SGN(%a1)	//clear out the sign byte
+
+	movel	#0x0c,%d0		//byte count for extended number
+	exg	%a0,%a1		//a0=supervisor source, a1=user dest
+	bsrl	mem_write	//move the number to the user's memory
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_unfl.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unfl.s
new file mode 100644
index 0000000000..205f466e73
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unfl.s
@@ -0,0 +1,269 @@
+//
+//	x_unfl.sa 3.4 7/1/91
+//
+//	fpsp_unfl --- FPSP handler for underflow exception
+//
+// Trap disabled results
+//	For 881/2 compatibility, sw must denormalize the intermediate 
+// result, then store the result.  Denormalization is accomplished 
+// by taking the intermediate result (which is always normalized) and 
+// shifting the mantissa right while incrementing the exponent until 
+// it is equal to the denormalized exponent for the destination 
+// format.  After denormalization, the result is rounded to the 
+// destination format.
+//		
+// Trap enabled results
+// 	All trap disabled code applies.	In addition the exceptional 
+// operand needs to made available to the user with a bias of $6000 
+// added to the exponent.
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_UNFL:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	denorm
+	|xref	round
+	|xref	store
+	|xref	g_rndpr
+	|xref	g_opcls
+	|xref	g_dfmtou
+	|xref	real_unfl
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	b1238_fix
+
+	.global	fpsp_unfl
+fpsp_unfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+//
+	bsrl		unf_res	//denormalize, round & store interm op
+//
+// If underflow exceptions are not enabled, check for inexact
+// exception
+//
+	btstb		#unfl_bit,FPCR_ENABLE(%a6)
+	beqs		ck_inex
+
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_1
+//
+// Clear dirty bit on dest resister in the frame before branching
+// to b1238_fix.
+//
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix		//test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_1:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_unfl
+//
+// It is possible to have either inex2 or inex1 exceptions with the
+// unfl.  If the inex enable bit is set in the FPCR, and either
+// inex2 or inex1 occurred, we must clean up and branch to the
+// real inex handler.
+//
+ck_inex:
+	moveb		FPCR_ENABLE(%a6),%d0
+	andb		FPSR_EXCEPT(%a6),%d0
+	andib		#0x3,%d0
+	beqs		unfl_done
+
+//
+// Inexact enabled and reported, and we must take an inexact exception
+//	
+take_inex:
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_2
+//
+// Clear dirty bit on dest resister in the frame before branching
+// to b1238_fix.
+//
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix		//test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_2:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx        USER_FP0(%a6),%fp0-%fp3
+	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore        (%a7)+
+	unlk            %a6
+	bral		real_inex
+
+unfl_done:
+	bclrb		#E3,E_BYTE(%a6)
+	beqs		e1_set		//if set then branch
+//
+// Clear dirty bit on dest resister in the frame before branching
+// to b1238_fix.
+//
+	bfextu		CMDREG3B(%a6){#6:#3},%d0		//get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
+	bsrl		b1238_fix		//test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+e1_set:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+//
+//	unf_res --- underflow result calculation
+//
+unf_res:
+	bsrl		g_rndpr		//returns RND_PREC in d0 0=ext,
+//					;1=sgl, 2=dbl
+//					;we need the RND_PREC in the
+//					;upper word for round
+	movew		#0,-(%a7)	
+	movew		%d0,-(%a7)	//copy RND_PREC to stack
+//
+//
+// If the exception bit set is E3, the exceptional operand from the
+// fpu is in WBTEMP; else it is in FPTEMP.
+//
+	btstb		#E3,E_BYTE(%a6)
+	beqs		unf_E1
+unf_E3:
+	lea		WBTEMP(%a6),%a0	//a0 now points to operand
+//
+// Test for fsgldiv and fsglmul.  If the inst was one of these, then
+// force the precision to extended for the denorm routine.  Use
+// the user's precision for the round routine.
+//
+	movew		CMDREG3B(%a6),%d1	//check for fsgldiv or fsglmul
+	andiw		#0x7f,%d1
+	cmpiw		#0x30,%d1		//check for sgldiv
+	beqs		unf_sgl
+	cmpiw		#0x33,%d1		//check for sglmul
+	bnes		unf_cont	//if not, use fpcr prec in round
+unf_sgl:
+	clrl		%d0
+	movew		#0x1,(%a7)	//override g_rndpr precision
+//					;force single
+	bras		unf_cont
+unf_E1:
+	lea		FPTEMP(%a6),%a0	//a0 now points to operand
+unf_cont:
+	bclrb		#sign_bit,LOCAL_EX(%a0)	//clear sign bit
+	sne		LOCAL_SGN(%a0)		//store sign
+
+	bsrl		denorm		//returns denorm, a0 points to it
+//
+// WARNING:
+//				;d0 has guard,round sticky bit
+//				;make sure that it is not corrupted
+//				;before it reaches the round subroutine
+//				;also ensure that a0 isn't corrupted
+
+//
+// Set up d1 for round subroutine d1 contains the PREC/MODE
+// information respectively on upper/lower register halves.
+//
+	bfextu		FPCR_MODE(%a6){#2:#2},%d1	//get mode from FPCR
+//						;mode in lower d1
+	addl		(%a7)+,%d1		//merge PREC/MODE
+//
+// WARNING: a0 and d0 are assumed to be intact between the denorm and
+// round subroutines. All code between these two subroutines
+// must not corrupt a0 and d0.
+//
+//
+// Perform Round	
+//	Input:		a0 points to input operand
+//			d0{31:29} has guard, round, sticky
+//			d1{01:00} has rounding mode
+//			d1{17:16} has rounding precision
+//	Output:		a0 points to rounded operand
+//
+
+	bsrl		round		//returns rounded denorm at (a0)
+//
+// Differentiate between store to memory vs. store to register
+//
+unf_store:
+	bsrl		g_opcls		//returns opclass in d0{2:0}
+	cmpib		#0x3,%d0
+	bnes		not_opc011
+//
+// At this point, a store to memory is pending
+//
+opc011:
+	bsrl		g_dfmtou
+	tstb		%d0
+	beqs		ext_opc011	//If extended, do not subtract
+// 				;If destination format is sgl/dbl, 
+	tstb		LOCAL_HI(%a0)	//If rounded result is normal,don't
+//					;subtract
+	bmis		ext_opc011
+	subqw		#1,LOCAL_EX(%a0)	//account for denorm bias vs.
+//				;normalized bias
+//				;          normalized   denormalized
+//				;single       $7f           $7e
+//				;double       $3ff          $3fe
+//
+ext_opc011:
+	bsrl		store		//stores to memory
+	bras		unf_done	//finish up
+
+//
+// At this point, a store to a float register is pending
+//
+not_opc011:
+	bsrl		store	//stores to float register
+//				;a0 is not corrupted on a store to a
+//				;float register.
+//
+// Set the condition codes according to result
+//
+	tstl		LOCAL_HI(%a0)	//check upper mantissa
+	bnes		ck_sgn
+	tstl		LOCAL_LO(%a0)	//check lower mantissa
+	bnes		ck_sgn
+	bsetb		#z_bit,FPSR_CC(%a6) //set condition codes if zero
+ck_sgn:
+	btstb 		#sign_bit,LOCAL_EX(%a0)	//check the sign bit
+	beqs		unf_done
+	bsetb		#neg_bit,FPSR_CC(%a6)
+
+//
+// Finish.  
+//
+unf_done:
+	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs		no_aunfl
+	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_aunfl:
+	rts
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_unimp.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unimp.s
new file mode 100644
index 0000000000..e8aecfd985
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unimp.s
@@ -0,0 +1,77 @@
+//
+//	x_unimp.sa 3.3 7/1/91
+//
+//	fpsp_unimp --- FPSP handler for unimplemented instruction	
+//	exception.
+//
+// Invoked when the user program encounters a floating-point
+// op-code that hardware does not support.  Trap vector# 11
+// (See table 8-1 MC68030 User's Manual).
+//
+// 
+// Note: An fsave for an unimplemented inst. will create a short
+// fsave stack.
+//
+//  Input: 1. Six word stack frame for unimplemented inst, four word
+//            for illegal
+//            (See table 8-7 MC68030 User's Manual).
+//         2. Unimp (short) fsave state frame created here by fsave
+//            instruction.
+//
+//
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_UNIMP:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	get_op
+	|xref	do_func
+	|xref	sto_res
+	|xref	gen_except
+	|xref	fpsp_fmt_error
+
+	.global	fpsp_unimp
+	.global	uni_2
+fpsp_unimp:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+uni_2:
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+	moveb		(%a7),%d0		//test for valid version num
+	andib		#0xf0,%d0		//test for $4x
+	cmpib		#VER_4,%d0	//must be $4x or exit
+	bnel		fpsp_fmt_error
+//
+//	Temporary D25B Fix
+//	The following lines are used to ensure that the FPSR
+//	exception byte and condition codes are clear before proceeding
+//
+	movel		USER_FPSR(%a6),%d0
+	andl		#0xFF00FF,%d0	//clear all but accrued exceptions
+	movel		%d0,USER_FPSR(%a6)
+	fmovel		#0,%FPSR //clear all user bits
+	fmovel		#0,%FPCR	//clear all user exceptions for FPSP
+
+	clrb		UFLG_TMP(%a6)	//clr flag for unsupp data
+
+	bsrl		get_op		//go get operand(s)
+	clrb		STORE_FLG(%a6)
+	bsrl		do_func		//do the function
+	fsave		-(%a7)		//capture possible exc state
+	tstb		STORE_FLG(%a6)
+	bnes		no_store	//if STORE_FLG is set, no store
+	bsrl		sto_res		//store the result in user space
+no_store:
+	bral		gen_except	//post any exceptions and return
+
+	|end
diff --git a/c/src/lib/libcpu/m68k/m68040/fpsp/x_unsupp.s b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unsupp.s
new file mode 100644
index 0000000000..b6e7e51b0a
--- /dev/null
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unsupp.s
@@ -0,0 +1,83 @@
+//
+//	x_unsupp.sa 3.3 7/1/91
+//
+//	fpsp_unsupp --- FPSP handler for unsupported data type exception
+//
+// Trap vector #55	(See table 8-1 Mc68030 User's manual).	
+// Invoked when the user program encounters a data format (packed) that
+// hardware does not support or a data type (denormalized numbers or un-
+// normalized numbers).
+// Normalizes denorms and unnorms, unpacks packed numbers then stores 
+// them back into the machine to let the 040 finish the operation.  
+//
+// Unsupp calls two routines:
+// 	1. get_op -  gets the operand(s)
+// 	2. res_func - restore the function back into the 040 or
+// 			if fmove.p fpm,<ea> then pack source (fpm)
+// 			and store in users memory <ea>.
+//
+//  Input: Long fsave stack frame
+//
+//
+
+//		Copyright (C) Motorola, Inc. 1990
+//			All Rights Reserved
+//
+//	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
+//	The copyright notice above does not evidence any  
+//	actual or intended publication of such source code.
+
+X_UNSUPP:	//idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	.include "fpsp.defs"
+
+	|xref	get_op
+	|xref	res_func
+	|xref	gen_except
+	|xref	fpsp_fmt_error
+
+	.global	fpsp_unsupp
+fpsp_unsupp:
+//
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+
+	moveb		(%a7),VER_TMP(%a6) //save version number
+	moveb		(%a7),%d0		//test for valid version num
+	andib		#0xf0,%d0		//test for $4x
+	cmpib		#VER_4,%d0	//must be $4x or exit
+	bnel		fpsp_fmt_error
+
+	fmovel		#0,%FPSR		//clear all user status bits
+	fmovel		#0,%FPCR		//clear all user control bits
+//
+//	The following lines are used to ensure that the FPSR
+//	exception byte and condition codes are clear before proceeding,
+//	except in the case of fmove, which leaves the cc's intact.
+//
+unsupp_con:
+	movel		USER_FPSR(%a6),%d1
+	btst		#5,CMDREG1B(%a6)	//looking for fmove out
+	bne		fmove_con
+	andl		#0xFF00FF,%d1	//clear all but aexcs and qbyte
+	bras		end_fix
+fmove_con:
+	andl		#0x0FFF40FF,%d1	//clear all but cc's, snan bit, aexcs, and qbyte
+end_fix:
+	movel		%d1,USER_FPSR(%a6)
+
+	st		UFLG_TMP(%a6)	//set flag for unsupp data
+
+	bsrl		get_op		//everything okay, go get operand(s)
+	bsrl		res_func	//fix up stack frame so can restore it
+	clrl		-(%a7)
+	moveb		VER_TMP(%a6),(%a7) //move idle fmt word to top of stack
+	bral		gen_except
+//
+	|end
author	Joel Sherrill <joel.sherrill@OARcorp.com>	1997-04-16 17:33:04 +0000
committer	Joel Sherrill <joel.sherrill@OARcorp.com>	1997-04-16 17:33:04 +0000
commit	f9b93da8b47ff7ea4d6573b75b6077f6efb8dbc6 (patch)
tree	46e2747b2b8f04d36d530daad59481f4f79e3c00 /c/src/lib/libcpu
parent	Added --disable-tcpip option. (diff)
download	rtems-f9b93da8b47ff7ea4d6573b75b6077f6efb8dbc6.tar.bz2