summaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
authorJoel Sherrill <joel.sherrill@OARcorp.com>2011-03-17 13:24:17 +0000
committerJoel Sherrill <joel.sherrill@OARcorp.com>2011-03-17 13:24:17 +0000
commit6e5b282d86abe5e28c79638b1bc9e568b1142b2b (patch)
tree395e8e173d6f6ac188f21233d42b53f700b1dfcd /benchmarks
parenta1abfc8bed0382034b4a8cfbc055d3113c15f55c (diff)
2011-03-17 Joel Sherrill <joel.sherrill@oarcorp.com>
* ChangeLog, Makefile, nbench/Makefile, nbench/README, nbench/main.c, nbench/nbench-byte-2.2.3/COM.DAT, nbench/nbench-byte-2.2.3/Changes, nbench/nbench-byte-2.2.3/Makefile, nbench/nbench-byte-2.2.3/NNET.DAT, nbench/nbench-byte-2.2.3/README, nbench/nbench-byte-2.2.3/README.motorola, nbench/nbench-byte-2.2.3/README.nonlinux, nbench/nbench-byte-2.2.3/README.submit, nbench/nbench-byte-2.2.3/RESULTS, nbench/nbench-byte-2.2.3/bdoc.txt, nbench/nbench-byte-2.2.3/debugbit.good.gz, nbench/nbench-byte-2.2.3/emfloat.c, nbench/nbench-byte-2.2.3/emfloat.h, nbench/nbench-byte-2.2.3/hardware.c, nbench/nbench-byte-2.2.3/hardware.h, nbench/nbench-byte-2.2.3/hello.c, nbench/nbench-byte-2.2.3/misc.c, nbench/nbench-byte-2.2.3/misc.h, nbench/nbench-byte-2.2.3/nbench0.c, nbench/nbench-byte-2.2.3/nbench0.h, nbench/nbench-byte-2.2.3/nbench1.c, nbench/nbench-byte-2.2.3/nbench1.h, nbench/nbench-byte-2.2.3/nmglobal.h, nbench/nbench-byte-2.2.3/pointer.c, nbench/nbench-byte-2.2.3/pointer.h, nbench/nbench-byte-2.2.3/sysinfo.c, nbench/nbench-byte-2.2.3/sysinfo.c.example, nbench/nbench-byte-2.2.3/sysinfo.c.template, nbench/nbench-byte-2.2.3/sysinfo.sh, nbench/nbench-byte-2.2.3/sysinfoc.c, nbench/nbench-byte-2.2.3/sysinfoc.c.example, nbench/nbench-byte-2.2.3/sysinfoc.c.template, nbench/nbench-byte-2.2.3/sysspec.c, nbench/nbench-byte-2.2.3/sysspec.h, nbench/nbench-byte-2.2.3/wordcat.h: New files. Add first benchmark -- Byte nbench.
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/ChangeLog34
-rw-r--r--benchmarks/Makefile9
-rw-r--r--benchmarks/nbench/Makefile36
-rw-r--r--benchmarks/nbench/README9
-rw-r--r--benchmarks/nbench/main.c104
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/COM.DAT11
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/Changes42
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/Makefile153
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/NNET.DAT210
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/README66
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/README.motorola29
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/README.nonlinux50
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/README.submit33
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/RESULTS138
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/bdoc.txt2109
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/debugbit.good.gzbin0 -> 1019 bytes
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/emfloat.c1343
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/emfloat.h154
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/hardware.c215
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/hardware.h2
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/hello.c2
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/misc.c120
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/misc.h41
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/nbench0.c1176
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/nbench0.h356
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/nbench1.c4449
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/nbench1.h428
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/nmglobal.h522
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/pointer.c6
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/pointer.h1
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c10
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.example10
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.template10
-rwxr-xr-xbenchmarks/nbench/nbench-byte-2.2.3/sysinfo.sh78
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c4
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.example4
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.template4
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysspec.c894
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/sysspec.h168
-rw-r--r--benchmarks/nbench/nbench-byte-2.2.3/wordcat.h81
40 files changed, 13111 insertions, 0 deletions
diff --git a/benchmarks/ChangeLog b/benchmarks/ChangeLog
new file mode 100644
index 0000000..f0a0c70
--- /dev/null
+++ b/benchmarks/ChangeLog
@@ -0,0 +1,34 @@
+2011-03-17 Joel Sherrill <joel.sherrill@oarcorp.com>
+
+ * ChangeLog, Makefile, nbench/Makefile, nbench/README, nbench/main.c,
+ nbench/nbench-byte-2.2.3/COM.DAT, nbench/nbench-byte-2.2.3/Changes,
+ nbench/nbench-byte-2.2.3/Makefile, nbench/nbench-byte-2.2.3/NNET.DAT,
+ nbench/nbench-byte-2.2.3/README,
+ nbench/nbench-byte-2.2.3/README.motorola,
+ nbench/nbench-byte-2.2.3/README.nonlinux,
+ nbench/nbench-byte-2.2.3/README.submit,
+ nbench/nbench-byte-2.2.3/RESULTS, nbench/nbench-byte-2.2.3/bdoc.txt,
+ nbench/nbench-byte-2.2.3/debugbit.good.gz,
+ nbench/nbench-byte-2.2.3/emfloat.c,
+ nbench/nbench-byte-2.2.3/emfloat.h,
+ nbench/nbench-byte-2.2.3/hardware.c,
+ nbench/nbench-byte-2.2.3/hardware.h,
+ nbench/nbench-byte-2.2.3/hello.c, nbench/nbench-byte-2.2.3/misc.c,
+ nbench/nbench-byte-2.2.3/misc.h, nbench/nbench-byte-2.2.3/nbench0.c,
+ nbench/nbench-byte-2.2.3/nbench0.h,
+ nbench/nbench-byte-2.2.3/nbench1.c,
+ nbench/nbench-byte-2.2.3/nbench1.h,
+ nbench/nbench-byte-2.2.3/nmglobal.h,
+ nbench/nbench-byte-2.2.3/pointer.c,
+ nbench/nbench-byte-2.2.3/pointer.h,
+ nbench/nbench-byte-2.2.3/sysinfo.c,
+ nbench/nbench-byte-2.2.3/sysinfo.c.example,
+ nbench/nbench-byte-2.2.3/sysinfo.c.template,
+ nbench/nbench-byte-2.2.3/sysinfo.sh,
+ nbench/nbench-byte-2.2.3/sysinfoc.c,
+ nbench/nbench-byte-2.2.3/sysinfoc.c.example,
+ nbench/nbench-byte-2.2.3/sysinfoc.c.template,
+ nbench/nbench-byte-2.2.3/sysspec.c,
+ nbench/nbench-byte-2.2.3/sysspec.h,
+ nbench/nbench-byte-2.2.3/wordcat.h: New files.
+ Add first benchmark -- Byte nbench.
diff --git a/benchmarks/Makefile b/benchmarks/Makefile
new file mode 100644
index 0000000..ce5788c
--- /dev/null
+++ b/benchmarks/Makefile
@@ -0,0 +1,9 @@
+#
+# $Id$
+#
+
+include $(RTEMS_MAKEFILE_PATH)/Makefile.inc
+include $(RTEMS_CUSTOM)
+include $(RTEMS_ROOT)/make/directory.cfg
+
+SUBDIRS=nbench
diff --git a/benchmarks/nbench/Makefile b/benchmarks/nbench/Makefile
new file mode 100644
index 0000000..47863a3
--- /dev/null
+++ b/benchmarks/nbench/Makefile
@@ -0,0 +1,36 @@
+#
+# $Id$
+#
+
+#
+# RTEMS_MAKEFILE_PATH is typically set in an environment variable
+#
+
+VPATH=.:nbench-byte-2.2.3
+PGM=${ARCH}/nbench.exe
+
+# optional managers required
+MANAGERS=all
+
+# C source names
+CSRCS = main.c \
+emfloat.c \
+hardware.c \
+misc.c \
+nbench0.c \
+nbench1.c \
+sysspec.c
+
+COBJS = $(CSRCS:%.c=${ARCH}/%.o)
+
+include $(RTEMS_MAKEFILE_PATH)/Makefile.inc
+include $(RTEMS_CUSTOM)
+include $(PROJECT_ROOT)/make/leaf.cfg
+
+LINK_LIBS += -lm
+OBJS= $(COBJS) $(CXXOBJS) $(ASOBJS)
+
+all: ${ARCH} $(PGM)
+
+$(PGM): $(OBJS)
+ $(make-exe)
diff --git a/benchmarks/nbench/README b/benchmarks/nbench/README
new file mode 100644
index 0000000..82bcec2
--- /dev/null
+++ b/benchmarks/nbench/README
@@ -0,0 +1,9 @@
+#
+# $Id$
+#
+
+Byte nbench Benchmark
+
+SOURCE: http://www.tux.org/~mayer/linux/bmark.html
+
+Ported to RTEMS by Petri Rokka (petri.rokka@tut.fi).
diff --git a/benchmarks/nbench/main.c b/benchmarks/nbench/main.c
new file mode 100644
index 0000000..81d93cb
--- /dev/null
+++ b/benchmarks/nbench/main.c
@@ -0,0 +1,104 @@
+/*
+ * Written by Petri Rokka <petri.rokka@tut.fi> based
+ * upon numerous RTEMS examples.
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.com/license/LICENSE.
+ *
+ * $Id$
+ */
+
+#define CONFIGURE_MINIMUM_TASK_STACK_SIZE (4*1024)
+#define CONFIGURE_UNIFIED_WORK_AREAS
+
+#define CONFIGURE_APPLICATION
+#define CONFIGURE_MEMORY_OVERHEAD (2560)
+
+#define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER
+#define CONFIGURE_APPLICATION_NEEDS_CONSOLE_DRIVER
+#define CONFIGURE_APPLICATION_NEEDS_LIBBLOCK
+
+#define CONFIGURE_MICROSECONDS_PER_TICK 1000
+#define CONFIGURE_TICKS_PER_TIMESLICE 5
+
+#define CONFIGURE_LIBIO_MAXIMUM_FILE_DESCRIPTORS 50
+#define CONFIGURE_USE_IMFS_AS_BASE_FILESYSTEM
+
+#define CONFIGURE_SWAPOUT_TASK_PRIORITY 220
+#define CONFIGURE_MAXIMUM_SEMAPHORES 2
+#define CONFIGURE_MAXIMUM_PTYS 6
+#define CONFIGURE_MAXIMUM_TASKS 5
+
+#define CONFIGURE_INIT_TASK_INITIAL_MODES (RTEMS_NO_PREEMPT | \
+ RTEMS_NO_TIMESLICE | \
+ RTEMS_ASR | \
+ RTEMS_INTERRUPT_LEVEL(0))
+
+#define CONFIGURE_INIT_TASK_STACK_SIZE (RTEMS_MINIMUM_STACK_SIZE * 20)
+#define CONFIGURE_INIT_TASK_PRIORITY 50
+#define CONFIGURE_RTEMS_INIT_TASKS_TABLE
+
+#define CONFIGURE_EXTRA_TASK_STACKS (RTEMS_MINIMUM_STACK_SIZE * 3)
+
+#define CONFIGURE_MAXIMUM_USER_EXTENSIONS 10
+
+#define CONFIGURE_SHELL_COMMANDS_INI
+
+#define CONFIGURE_NUMBER_OF_TERMIOS_PORTS 0
+
+#include <rtems/shell.h>
+
+int nbench_main(int argc, char **argv);
+
+#define CONFIGURE_SHELL_COMMANDS_ALL
+
+#include <rtems/shellconfig.h>
+
+#define CONFIGURE_INIT
+#define CONFIGURE_INIT_TASK_ATTRIBUTES (RTEMS_FLOATING_POINT)
+#define CONFIGURE_STACK_CHECKER_ENABLED
+#include <rtems/confdefs.h>
+
+#include <stdio.h>
+#include <rtems.h>
+
+rtems_task task_nbench(rtems_task_argument arg)
+{
+ char **args[1] = {"./nbench"};
+ nbench_main(1, args);
+}
+
+rtems_task Init(rtems_task_argument arg)
+{
+ rtems_status_code sc;
+ rtems_id task_id_nbench;
+ char **args[1] = {"./nbench"};
+ rtems_interval tickspersec;
+
+ /*
+ * Task uses about 22K on x86
+ */
+ sc = rtems_task_create(
+ rtems_build_name('N','B','M','2'),
+ 51,
+ 64*1024,
+ RTEMS_DEFAULT_MODES,
+ RTEMS_FLOATING_POINT,
+ &task_id_nbench);
+ if ( sc )
+ printf( "unable to create NBM2\n" );
+
+ sc = rtems_task_start( task_id_nbench, task_nbench, 0 );
+ if ( sc )
+ printf( "unable to start NBM2\n" );
+
+ tickspersec = rtems_clock_get_ticks_per_second();
+
+ printf("clocks per sec: %ld\n", tickspersec);
+ printf("Starting Shell\n");
+
+ rtems_shell_main_loop(NULL);
+
+ rtems_task_delete( RTEMS_SELF );
+}
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/COM.DAT b/benchmarks/nbench/nbench-byte-2.2.3/COM.DAT
new file mode 100644
index 0000000..8dee49c
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/COM.DAT
@@ -0,0 +1,11 @@
+ALLSTATS=T
+DONUMSORT=T
+DOSTRINGSORT=T
+DOBITFIELD=T
+DOEMF=T
+DOFOUR=T
+DOASSIGN=T
+DOIDEA=T
+DOHUFF=T
+DONNET=T
+DOLU=T
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/Changes b/benchmarks/nbench/nbench-byte-2.2.3/Changes
new file mode 100644
index 0000000..111d8bd
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/Changes
@@ -0,0 +1,42 @@
+This is about BYTE's beta version of the native-algorithm benchmark
+
+December 16, 1996:
+
+The source for DOS is obtainable at http://www.byte.com/bmark/bmark.htm
+Linux adaptation written by Uwe F. Mayer <mayer@tux.org>
+
+February 7, 1997:
+
+added -DSOLARIS flag to support solaris
+
+November 11, 1997:
+
+added index split suggested by Andrew D. Balsa
+re-baselined to a Linux machine
+added checking of CPU-type at run-time (cpuinfo.c)
+increased maximal number of loops in some tests
+removed -DSOLARIS flag, works now automatically (this also removed the
+ compiler warnings about redefined types and leads to a 20% faster
+ code for "Bitfield" if compiled with -funroll-loops!)
+
+November 13-19, 1997:
+
+changed debugging information
+changed random number generator to be always 32 bits even on 64 bit OSs
+added data resets to Bitfield and Huffman
+created this Changes file
+added debug code for Bitfield
+
+December 6, 1997:
+
+got rid of cpuinfo.c
+added a RESULTS file
+
+December 7, 1997:
+
+fixed the statistical analysis used to compute the confidence coefficient
+fixed a bug in the DEBUG routine of "Assignment"
+
+December 11, 1997
+added some entries to RESULTS
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/Makefile b/benchmarks/nbench/nbench-byte-2.2.3/Makefile
new file mode 100644
index 0000000..630fb8c
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/Makefile
@@ -0,0 +1,153 @@
+# Makefile for nbench, December 11, 1997, Uwe F. Mayer <mayer@tux.org>
+# Updated February 18, 2003
+
+default: nbench
+
+##########################################################################
+# If you are using gcc-2.7.2.3 or earlier:
+# The optimizer of gcc has a bug and in general you should not specify
+# -funroll-loops together with -O (or -O2, -O3, etc.)
+# This bug is supposed to be fixed with release 2.8 of gcc.
+#
+# This bug does NOT seem to have an effect on the correct compilation
+# of this benchmark suite on my Linux box. However, it leads to
+# the dreaded "internal compiler error" message on our alpha
+# running DEC Unix 4.0b. The Linux-binary that was used to obtain
+# the baseline results was nevertheless compiled with
+# CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
+#
+# You should leave -static in the CFLAGS so that your sysinfo can be
+# compiled into the executable.
+
+CC = gcc
+
+# generic options for gcc
+#CFLAGS = -s -static -Wall -O3
+
+# if your gcc lets you do it, then try this one
+#CFLAGS = -s -static -Wall -O3 -fomit-frame-pointer -funroll-loops
+
+# for gcc on an older Pentium type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -m486 \
+# -fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+# -falign-jumps=2 -funroll-loops
+
+# for a newer gcc on a newer Pentium type processor you can try the following
+CFLAGS = -s -O3 -fomit-frame-pointer -Wall -march=native \
+ -fforce-addr -falign-loops=2 -falign-functions=2 \
+ -falign-jumps=2 -funroll-loops
+
+# for a newer gcc on an Athlon XP type processor you can try the following
+#CFLAGS = -s -static -O3 -fomit-frame-pointer -Wall -march=athlon-xp \
+# -fforce-addr -fforce-mem -falign-loops=2 -falign-functions=2 \
+# -falign-jumps=2 -funroll-loops
+
+# For debugging using gcc
+#CFLAGS = -g -O3 -Wall -DDEBUG
+
+##########################################################################
+# For Linux machines with more than one binary format.
+# The default binaries, depends on your system whether it's elf or aout.
+MACHINE=
+# a.out code for linux on an elf machine
+#MACHINE= -bi486-linuxaout
+# elf code for linux on an a.out machine
+#MACHINE= -bi486-linuxelf
+# if you want a different compiler version and different binaries, for example
+#MACHINE= -V2.7.2 -bi486-linuxaout
+
+##########################################################################
+# Read the file README.nonlinux if you are not using Linux
+
+# for DEC Unix using cc you can try
+#CC = cc
+#CFLAGS = -O3
+#LINKFLAGS = -s -non_shared
+
+# for SunOS using cc
+#CC = cc
+#CFLAGS = -O3 -s
+
+# for DEC Ultrix using cc
+#CC = cc
+#CFLAGS = -O2
+#LINKFLAGS = -s
+
+# for a Mac with OsX and the Darwin environment
+#CC = cc
+#CFLAGS = -O3 -DOSX
+
+# For debugging using cc
+#CC = cc
+#CFLAGS = -g -DDEBUG
+
+##########################################################################
+# If your system does not understand the system command "uname -s -r"
+# then comment this out
+
+# NO_UNAME= -DNO_UNAME
+
+##########################################################################
+# For any Unix flavor you need -DLINUX
+# You also need -DLINUX to get the new indices
+
+DEFINES= -DLINUX $(NO_UNAME)
+
+##########################################################################
+# For LINUX-like systems with gcc
+sysinfoc.c: Makefile
+ ./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+
+sysinfo.c: Makefile
+ ./sysinfo.sh $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)
+
+##########################################################################
+# For non-LINUX systems
+# Edit the files sysinfo.c and sysinfoc.c to include your system information
+# and take sysinfo.c and sysinfoc.c out of the dependencies for nbench0.o
+
+hardware.o: hardware.c hardware.h Makefile
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -c hardware.c
+
+nbench0.o: nbench0.h nbench0.c nmglobal.h pointer.h hardware.h\
+ Makefile sysinfo.c sysinfoc.c
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -c nbench0.c
+
+emfloat.o: emfloat.h emfloat.c nmglobal.h pointer.h Makefile
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -c emfloat.c
+
+pointer.h: pointer Makefile
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -o pointer pointer.c
+ rm -f pointer.h
+ if [ "4" = `./pointer` ] ; then touch pointer.h ;\
+ else echo "#define LONG64" >pointer.h ; fi
+
+misc.o: misc.h misc.c Makefile
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -c misc.c
+
+nbench1.o: nbench1.h nbench1.c wordcat.h nmglobal.h pointer.h Makefile
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -c nbench1.c
+
+sysspec.o: sysspec.h sysspec.c nmglobal.h pointer.h Makefile
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS)\
+ -c sysspec.c
+
+nbench: emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o
+ $(CC) $(MACHINE) $(DEFINES) $(CFLAGS) $(LINKFLAGS)\
+ emfloat.o misc.o nbench0.o nbench1.o sysspec.o hardware.o\
+ -o nbench -lm
+
+##########################################################################
+
+clean:
+ - /bin/rm -f *.o *~ \#* core a.out hello sysinfo.c sysinfoc.c \
+ bug pointer pointer.h debugbit.dat
+
+mrproper: clean
+ - /bin/rm -f nbench
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/NNET.DAT b/benchmarks/nbench/nbench-byte-2.2.3/NNET.DAT
new file mode 100644
index 0000000..5711730
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/NNET.DAT
@@ -0,0 +1,210 @@
+5 7 8
+26
+0 0 1 0 0
+0 1 0 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 0 0 0 0 0 1
+1 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 0
+0 1 0 0 0 0 1 0
+0 1 1 1 0
+1 0 0 0 1
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 1
+0 1 1 1 0
+0 1 0 0 0 0 1 1
+1 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 0
+0 1 0 0 0 1 0 0
+1 1 1 1 1
+1 0 0 0 0
+1 0 0 0 0
+1 1 1 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 1 1 1 1
+0 1 0 0 0 1 0 1
+1 1 1 1 1
+1 0 0 0 0
+1 0 0 0 0
+1 1 1 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+0 1 0 0 0 1 1 0
+0 1 1 1 0
+1 0 0 0 1
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 1 1
+1 0 0 0 1
+0 1 1 1 0
+0 1 0 0 0 1 1 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 0 0 1 0 0 0
+0 1 1 1 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 1 1 1 0
+0 1 0 0 1 0 0 1
+0 0 0 0 1
+0 0 0 0 1
+0 0 0 0 1
+0 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 1 1 0
+0 1 0 0 1 0 1 0
+1 0 0 0 1
+1 0 0 1 0
+1 0 1 0 0
+1 1 0 0 0
+1 0 1 0 0
+1 0 0 1 0
+1 0 0 0 1
+0 1 0 0 1 0 1 1
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+1 1 1 1 1
+0 1 0 0 1 1 0 0
+1 0 0 0 1
+1 1 0 1 1
+1 0 1 0 1
+1 0 1 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 0 0 1 1 0 1
+1 0 0 0 1
+1 1 0 0 1
+1 0 1 0 1
+1 0 1 0 1
+1 0 1 0 1
+1 0 0 1 1
+1 0 0 0 1
+0 1 0 0 1 1 1 0
+0 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 1 1 0
+0 1 0 0 1 1 1 1
+1 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 0
+1 0 0 0 0
+1 0 0 0 0
+1 0 0 0 0
+0 1 0 1 0 0 0 0
+0 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 1 0 1
+1 0 0 1 1
+0 1 1 1 1
+0 1 0 1 0 0 0 1
+1 1 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 1 1 1 0
+1 0 1 0 0
+1 0 0 1 0
+1 0 0 0 1
+0 1 0 1 0 0 1 0
+0 1 1 1 1
+1 0 0 0 0
+1 0 0 0 0
+0 1 1 1 0
+0 0 0 0 1
+0 0 0 0 1
+1 1 1 1 0
+0 1 0 1 0 0 1 1
+1 1 1 1 1
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 1 0 1 0 1 0 0
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 1 1 0
+0 1 0 1 0 1 0 1
+1 0 0 0 1
+1 0 0 0 1
+0 1 0 1 0
+0 1 0 1 0
+0 1 0 1 0
+0 1 0 1 0
+0 0 1 0 0
+0 1 0 1 0 1 1 0
+1 0 0 0 1
+1 0 0 0 1
+1 0 0 0 1
+1 0 1 0 1
+1 0 1 0 1
+1 0 1 0 1
+0 1 0 1 0
+0 1 0 1 0 1 1 1
+1 0 0 0 1
+0 1 0 1 0
+0 1 0 1 0
+0 0 1 0 0
+0 1 0 1 0
+0 1 0 1 0
+1 0 0 0 1
+0 1 0 1 1 0 0 0
+1 0 0 0 1
+0 1 0 1 0
+0 1 0 1 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 0 1 0 0
+0 1 0 1 1 0 0 1
+1 1 1 1 1
+0 0 0 1 0
+0 0 0 1 0
+0 0 1 0 0
+0 1 0 0 0
+0 1 0 0 0
+1 1 1 1 1
+0 1 0 1 1 0 1 0
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/README b/benchmarks/nbench/nbench-byte-2.2.3/README
new file mode 100644
index 0000000..6863d46
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/README
@@ -0,0 +1,66 @@
+February 18, 2003
+-----------------
+Bug-fix release.
+
+December 9, 1997
+----------------
+This release is based on beta release 2 of BYTE Magazine's BYTEmark
+benchmark program (previously known as BYTE's Native Mode
+Benchmarks). This document covers the Native Mode (a.k.a. Algorithm
+Level) tests; benchmarks designed to expose the capabilities of a
+system's CPU, FPU, and memory system.
+
+Running a "make" will create the binary if all goes well. It is called
+"nbench" and performs a suite of 10 tests and compares the results to
+a Dell Pentium 90 with 16 MB RAM and 256 KB L2 cache running MSDOS and
+compiling with the Watcom 10.0 C/C++ compiler. If you define -DLINUX
+during compilation (the default) then you also get a comparison to an
+AMD K6/233 with 32 MB RAM and 512 KB L2-cache running Linux 2.0.32 and
+using a binary which was compiled with GNU gcc version 2.7.2.3 and GNU
+libc-5.4.38.
+
+For more verbose output specify -v as an argument.
+
+The primary web site is: http://www.tux.org/~mayer/linux/bmark.html
+
+The port to Linux/Unix was done by Uwe F. Mayer <mayer@tux.org>.
+
+The index-split was done by Andrew D. Balsa, and reflects the
+realization that memory management is important in CPU design. The
+original tests have been left alone, however, the tests NUMERIC SORT,
+FP EMULATION, IDEA, and HUFFMAN now constitute the integer-arithmetic
+focused benchmark index, while the tests STRING SORT, BITFIELD, and
+ASSIGNMENT make up the new memory index.
+
+The algorithms were not changed from the source which was obtained
+from the BYTE web site at http://www.byte.com/bmark/bmark.htm on
+December 14, 1996. However, the source was modified to better work
+with 64-bit machines (in particular the random number generator was
+modified to always work with 32 bit, no matter what kind of hardware
+you run it on). Furthermore, for some of the algorithms additional
+resettings of the data was added to increase the consistency across
+different hardware. Some extra debugging code was added, which has no
+impact on normal runs.
+
+In case there is uneven system load due to other processes while this
+benchmark suite executes, it might take longer to run than on an
+unloaded system. This is because the benchmark does some statistical
+analysis to make sure that the reported results are statistically
+significant, and an increased variation in individual runs requires
+more runs to achieve the required statistical confidence.
+
+This is a single-threaded benchmark and is not designed to measure the
+performance gain on multi-processor machines.
+
+For details and customization read bdoc.txt.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/README.motorola b/benchmarks/nbench/nbench-byte-2.2.3/README.motorola
new file mode 100644
index 0000000..223001b
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/README.motorola
@@ -0,0 +1,29 @@
+The information in this file is old and no longer valid. It seems that
+the GNU C library has caught up with Motorola's libmoto, and now
+performance is just as good (or better) without libmoto. I'll include
+the old notice out of historical reasons only. Currently libmoto is
+available at ftp://ftp.mcg.mot.com/pub/SPS/PowerPC/software/mklinux/libmoto/,
+but this is subject to change and not under my control.
+
+February 18, 2003
+Uwe F. Mayer
+
+---------------------------------------------------------------------------
+
+If you have a Motorola CPU or equivalent:
+
+When linked with the 'libmoto' (floating point library from Motorola)
+the results you obtain are much better. (FPU index of 0.896 versus
+1.910 in one example.)
+
+The Motorola math library is currently available at:
+http://www.mot.com/SPS/PowerPC/support/rsw_customer_support/mklinux/libmoto/libmoto_reg_mkdev.html
+
+If you have a Motorola CPU and you submit a result then please let me
+know whether you used libmoto or not. Please read the file README.submit.
+
+I do not have a Motorola CPU, and I can't help you with installing the
+library either.
+
+December 3, 1997
+Uwe F. Mayer \ No newline at end of file
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/README.nonlinux b/benchmarks/nbench/nbench-byte-2.2.3/README.nonlinux
new file mode 100644
index 0000000..641fe09
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/README.nonlinux
@@ -0,0 +1,50 @@
+December 3, 1993
+================
+
+DEC Unix 4.0 or DEC OSF1 and gcc
+--------------------------------
+Compiles cleanly if you don't use -funroll-loops with gcc-2.7.2.3 or earlier
+
+DEC UNIX 4.0 or DEC OSF1 and cc
+-------------------------------
+CC = cc
+CFLAGS = -O3
+LINKFLAGS = -s -non_shared
+
+Compiles cleanly.
+
+SunOS and gcc
+-------------
+Compiles cleanly
+
+SunOS and cc
+------------
+CC = cc
+CFLAGS = -O3 -s
+
+Compiles with one warning during compilation of nbench1.c
+
+"/usr/ucbinclude/strings.h", line 48: warning: identifier redeclared: strlen
+ current : function() returning int
+ previous: function() returning uint : "/usr/include/string.h", line 98
+
+HP-UX and gcc
+-------------
+Compiles with one warning during compilation of sysspec.c
+
+In file included from /usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/malloc.h:9,
+ from sysspec.h:37,
+ from sysspec.c:37:
+/usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/sys/types.h:117: warning: empty declaration
+/usr/local/lib/gcc-lib/hppa1.1-hp-hpux9.05/2.7.2.1/include/sys/types.h:118: warning: empty declaration
+
+DEC Ultrix and cc
+-----------------
+CC = cc
+CFLAGS = -O2
+LINKFLAGS = -s
+
+Compiles with a warning about the correct usage of cut when running sysinfo.sh
+cut: Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...
+cut: Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/README.submit b/benchmarks/nbench/nbench-byte-2.2.3/README.submit
new file mode 100644
index 0000000..0dd3138
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/README.submit
@@ -0,0 +1,33 @@
+I plan on posting a digest of results in case people mail me any.
+The URL will be linked to
+
+http://www.tux.org/~mayer/linux/bmark.html
+
+If you want to submit, then run the benchmark (use your own
+compilation, I don't care with what flags or compiler, but I want all
+numbers from a single benchmark run) and fill in the template as given
+in the example below:
+
+CPU : AMD 5x86P75 (486DX4/133MHz)
+L2 CACHE : 256 KB
+OS : Linux 2.0.32
+C COMPILER : gcc 2.7.2.3
+LIBC : libc-5.4.38
+Pentium 90 INTEGER INDEX : 1.051
+Pentium 90 FLOATING-POINT INDEX : 0.450
+AMD K6/233 MEMORY INDEX : 0.337
+AMD K6/233 INTEGER INDEX : 0.238
+AMD K6/233 FLOATING-POINT INDEX : 0.230
+
+Any other format is fine as long as it contains the same info (write
+"unknown" or "?" for data you don't know). For example, you could just
+cut the summary from the output of nbench and mail it together with
+cache, CPU, and OS info in case it is not already present. Please do
+not email me the complete output of nbench, or any other unnecessarily
+long email, as this just eats up my hard-disk space. However, long
+collections of results are of course welcome.
+
+Send your result to mayer@tux.org
+
+Uwe F. Mayer
+February 18, 2003
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/RESULTS b/benchmarks/nbench/nbench-byte-2.2.3/RESULTS
new file mode 100644
index 0000000..ccf2336
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/RESULTS
@@ -0,0 +1,138 @@
+December 7, 1997
+
+This file contains a few results so you may compare your machine.
+If you read this much after December 1997 then the results herein
+are probably obsolete.
+
+For a longer and hopefully more up-to-date list of results consult
+http://www.tux.org/~mayer/linux/bmark.html
+This web site, however, currently lists the old Pentium 90 indices!
+
+The indices below are with respect to the new AMD K6/233 baseline.
+
+OS : DEC Ultrix 4.4
+C compiler : cc
+libc : unknown version
+CPU : mips R6000
+L2 cache : ?
+MEMORY INDEX : 0.029
+INTEGER INDEX : 0.046
+FLOATING-POINT INDEX: 0.077
+
+OS : LINUX 2.0.31
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Intel 486DX2/66 MHz
+L2 cache : 256 KB
+MEMORY INDEX : 0.098
+INTEGER INDEX : 0.141
+FLOATING-POINT INDEX: 0.116
+
+OS : LINUX 2.0.32
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : AMD 5x86P75 (486DX4/133MHz)
+L2 cache : 256 KB
+MEMORY INDEX : 0.234
+INTEGER INDEX : 0.286
+FLOATING-POINT INDEX: 0.249
+
+OS : OSF1 V3.2 214
+C compiler : cc
+libc : unknown version
+CPU : 21064 alpha (DEC 3000 MODEL 300, year 1993)
+L2 cache : 256 KB
+MEMORY INDEX : 0.358
+INTEGER INDEX : 0.362
+FLOATING-POINT INDEX: 0.656
+
+OS : HP-UX A.09.05
+C compiler : gcc version 2.7.2.1
+libc : unknown version
+CPU : 9000/715
+L2 cache : ?
+MEMORY INDEX : 0.208
+INTEGER INDEX : 0.369
+FLOATING-POINT INDEX: 0.516
+
+OS : LINUX 2.0.31
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Intel Pentium 133 MHz
+L2 cache : 512 KB
+MEMORY INDEX : 0.383
+INTEGER INDEX : 0.444
+FLOATING-POINT INDEX: 0.632
+
+OS : SunOS 5.5.1
+C compiler : cc
+libc : unknown version
+CPU : SUN-Ultra-Enterprise-2 sparc
+L2 cache : ?
+MEMORY INDEX : 0.417
+INTEGER INDEX : 0.546
+FLOATING-POINT INDEX: 1.028
+
+OS : LINUX 2.0.29
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Cyrix 6x86L PR200+ (at 2 x 75 = 150 MHz)
+L2 cache : 256 KB
+MEMORY INDEX : 0.666
+INTEGER INDEX : 0.599
+FLOATING-POINT INDEX: 0.508
+
+OS : LINUX 2.0.31
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Intel Pentium MMX 200 MHz
+L2 cache : 512 KB
+MEMORY INDEX : 0.601
+INTEGER INDEX : 0.636
+FLOATING-POINT INDEX: 0.970
+
+OS : LINUX 2.0.31
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Intel 686 PentiumPro 200 MHz
+L2 cache : 256 KB (internal)
+MEMORY INDEX : 0.699
+INTEGER INDEX : 0.732
+FLOATING-POINT INDEX: 1.140
+
+OS : LINUX 2.0.29
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Cyrix 6x86MX PR233 (at 2.5 x 75 = 187.5 MHz)
+L2 cache : 512 KB
+MEMORY INDEX : 0.861
+INTEGER INDEX : 0.773
+FLOATING-POINT INDEX: 0.730
+
+OS : LINUX 2.0.32
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : AMD K6/233
+L2 cache : 512 KB
+MEMORY INDEX : 1.000
+INTEGER INDEX : 1.000
+FLOATING-POINT INDEX: 1.000
+
+OS : LINUX 2.0.31
+C compiler : gcc version 2.7.2.3
+libc : libc.so.5.4.38
+CPU : Intel 686 Pentium II 300 MHz
+L2 cache : 512 KB
+MEMORY INDEX : 1.255
+INTEGER INDEX : 1.093
+FLOATING-POINT INDEX: 1.842
+
+OS : DEC UNIX 4.0b 564
+C compiler : cc
+libc : unknown version
+CPU : 21164 Alpha 300 MHz (dual CPU)
+L2 cache : 96 KB
+L3 cache : 4 MB per CPU
+MEMORY INDEX : 0.973
+INTEGER INDEX : 1.124
+FLOATING-POINT INDEX: 3.237
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/bdoc.txt b/benchmarks/nbench/nbench-byte-2.2.3/bdoc.txt
new file mode 100644
index 0000000..e557bb0
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/bdoc.txt
@@ -0,0 +1,2109 @@
+http://www.byte.com/bmark/bmark.htm
+----------------------------------------------------------------------------
+
+BYTEmark
+
+----------------------------------------------------------------------------
+
+This is release 2 of BYTE Magazine's BYTEmark benchmark program (previously
+known as BYTE's Native Mode Benchmarks). This document covers the Native
+Mode (a.k.a. Algorithm Level) tests; benchmarks designed to expose the
+capabilities of a system's CPU, FPU, and memory system. Another group of
+benchmarks within the BYTEmark suite includes the Application Simulation
+Benchmarks. They are detailed in a separate document. [NOTE: The
+documentation for the Application simulation benchmarks should appear before
+the end of March, 95. -- RG].
+
+The Tests
+
+The Native Mode portion of the BYTEmark consists of a number of well-known
+algorithms; some BYTE has used before in earlier versions of the benchmark,
+others are new. The complete suite consists of 10 tests:
+
+Numeric sort - Sorts an array of 32-bit integers.
+
+String sort - Sorts an array of strings of arbitrary length.
+
+Bitfield - Executes a variety of bit manipulation functions.
+
+Emulated floating-point - A small software floating-point package.
+
+Fourier coefficients - A numerical analysis routine for calculating series
+approximations of waveforms.
+
+Assignment algorithm - A well-known task allocation algorithm.
+
+Huffman compression - A well-known text and graphics compression algorithm.
+
+IDEA encryption - A relatively new block cipher algorithm.
+
+Neural Net - A small but functional back-propagation network simulator.
+
+LU Decomposition - A robust algorithm for solving linear equations.
+
+A more complete description of each test can be found in later sections of
+this document.
+
+BYTE built the BYTEmark with the multiplatform world foremost in mind. There
+were, of course, other considerations that we kept high on the list:
+
+Real-world algorithms. The algorithms should actually do something. Previous
+benchmarks often moved gobs of bytes from one point to another, added or
+subtracted piles and piles of numbers, or (in some cases) actually executed
+NOP instructions. We should not belittle those tests of yesterday, they had
+their place. However, we think it better that tests be based on activities
+that are more complex in nature.
+
+Easy to port. All the benchmarks are written in "vanilla" ANSI C. This
+provides us with the best chance of moving them quickly and accurately to
+new processors and operating systems as they appear. It also simplifies
+maintenance.
+
+This means that as new 64-bit (and, perhaps, 128-bit) processors appear, the
+benchmarks can test them as soon as a compiler is available.
+
+Comprehensive. The algorithms were derived from a variety of sources. Some
+are routines that BYTE had been using for some time. Others are routines
+derived from well-known texts in the computer science world. Furthermore,
+the algorithms differ in structure. Some simply "walk" sequentially through
+one-dimensional arrays. Others build and manipulate two-dimensional arrays.
+Finally, some benchmarks are "integer" tests, while others exercise the
+floating-point coprocessor (if one is available).
+
+Scalable. We wanted these benchmarks to be useful across as wide a variety
+of systems as possible. We also wanted to give them a lifetime beyond the
+next wave of new processors.
+
+To that end, we incorporated "dynamic workload adjustment." A complete
+description of this appears in a later section. In a nutshell, this allows
+the tests to "expand or contract" depending on the capabilities of the
+system under test, all the while providing consistent results so that fair
+and accurate comparisons are possible.
+
+Honesty In Advertising
+
+We'd be lying if we said that the BYTEmark was all the benchmarking that
+anyone would ever need to run on a system. It would be equally inaccurate to
+suggest that the tests are completely free of inadequacies. There are many
+things the tests do not do, there are shortcomings, and there are problems.
+
+BYTE will continue to improve the BYTEmark. The source code is freely
+available, and we encourage vendors and users to examine the routines and
+provide us with their feedback. In this way, we assure fairness,
+comprehensiveness, and accuracy.
+
+Still, as we mentioned, there are some shortcomings. Here are those we
+consider the most significant. Keep them in mind as you examine the results
+of the benchmarks now and in the future.
+
+At the mercy of C compilers. Being written in ANSI C, the benchmark program
+is highly portable. This is a reflection of the "world we live in." If this
+were a one-processor world, we might stand a chance at hand-crafting a
+benchmark in assembly language. (At one time, that's exactly what BYTE did.)
+Not today, no way.
+
+The upshot is that the benchmarks must be compiled. For broadest coverage,
+we selected ANSI C. And when they're compiled, the resulting executable's
+performance can be highly dependent on the capabilities of the C compiler.
+Today's benchmark results can be blown out of the water tomorrow if someone
+new enters the scene with an optimizing strategy that outperforms existing
+competition.
+
+This concern is not easily waved off. It will require you to keep careful
+track of compiler version and optimization switches. As BYTE builds its
+database of benchmark results, version number and switch setting will become
+an integral part of that data. This will be true for published information
+as well, so that you can make comparisons fairly and accurately. BYTE will
+control the distribution of test results so that all relevant compiler
+information is attached to the data.
+
+As a faint justification -- for those who think this situation results in
+"polluted" tests -- we should point out that we are in the same boat as all
+the other developers (at least, all those using C compilers -- and that's
+quite a sizeable group). If the only C compilers for a given system happen
+to be poor ones, everyone suffers. It's a fact that a given platform's
+ultimate potential depends as much on the development software available as
+on the technical achievements of the hardware design.
+
+It's just CPU and FPU. It's very tempting to try to capture the performance
+of a machine in a single number. That has never been possible -- though it's
+been tried a lot -- and the gap between that ideal and reality will forever
+widen.
+
+These benchmarks are meant to expose the theoretical upper limit of the CPU,
+FPU, and memory architecture of a system. They cannot measure video, disk,
+or network throughput (those are the domains of a different set of
+benchmarks). You should, therefore, use the results of these tests as part,
+not all, of any evaluation of a system.
+
+Single threaded. Currently, each benchmark test uses only a single execution
+thread. It's unlikely that you'll find any modern operating system that does
+not have some multitasking component. How a system "scales" as more tasks
+are run simultaneously is an effect that the current benchmarks cannot
+explore.
+
+BYTE is working on a future version of the tests that will solve this
+problem.
+
+The tests are synthetic. This quite reasonable argument is based on the fact
+that people don't run benchmarks for a living, they run applications.
+Consequently, the only true measure of a system is how well it performs
+whatever applications you will be running. This, in fact, is the philosophy
+behind the BAPCo benchmarks.
+
+This is not a point with which we would disagree. BYTE regularly makes use
+of a variety of application benchmarks. None of this suggests, however, that
+the BYTEmark benchmarks serve no purpose.
+
+BYTEmark's results should be used as predictors. They can be moved to a new
+platform long before native applications will be ported. The BYTEmark
+benchmarks will therefore provide an early look at the potential of the
+machine. Additionally, the BYTEmark permits you to "home in" on an aspect of
+the overall architecture. How well does the system perform when executing
+floating-point computations? Does its memory architecture help or hinder the
+management of memory buffers that may fall on arbitrary address boundaries?
+How does the cache work with a program whose memory access favors moving
+randomly through memory as opposed to moving sequentially through memory?
+
+The answers to these questions can give you a good idea of how well a system
+would support a particular class of applications. Only a synthetic benchmark
+can give the narrow view necessary to find the answers.
+
+Dynamic Workloads
+
+Our long history of benchmarking has taught us one thing above all others:
+Tomorrow's system will go faster than today's by an amount exceeding your
+wildest guess -- and then some. Dealing with this can become an unending
+race.
+
+It goes like this: You design a benchmark algorithm, you specify its
+parameters (how big the array is, how many loops, etc.), you run it on
+today's latest super-microcomputer, collect your data, and go home. A new
+machine arrives the next day, you run your benchmark, and discover that the
+test executes so quickly that the resolution of the clock routine you're
+using can't keep up with it (i.e., the test is over and done before the
+system clock even has a chance to tick).
+
+If you modify your routine, the figures you collected yesterday are no good.
+If you create a better clock routine by sneaking down into the system
+hardware, you can kiss portability goodbye.
+
+The BYTEmark benchmarks solve this problem by a process we'll refer to as
+"dynamic workload adjustment." In principle, it simply means that if the
+test runs so fast that the system clock can't time it, the benchmark
+increases the test workload -- and keeps increasing it -- until enough time
+is consumed to gather reliable test results.
+
+Here's an example.
+
+The BYTEmark benchmarks perform timing using a "stopwatch" paradigm. The
+routine StartStopwatch() begins timing; StopStopwatch() ends timing and
+reports the elapsed time in clock ticks. Now, "clock ticks" is a value that
+varies from system to system. We'll presume that our test system provides
+1000 clock ticks per second. (We'll also presume that the system actually
+updates its clock 1000 times per second. Surprisingly, some systems don't do
+that. One we know of will tell you that the clock provides 100 ticks per
+second, but updates the clock in 5- or 6-tick increments. The resolution is
+no better than somewhere around 1/18th of a second.) Here, when we say
+"system" we mean not only the computer system, but the environment provided
+by the C compiler. Interestingly, different C compilers for the same system
+will report different clock ticks per second.
+
+Built into the benchmarks is a global variable called GLOBALMINTICKS. This
+variable is the minimum number of clock ticks that the benchmark will allow
+StopStopwatch() to report.
+
+Suppose you run the Numeric Sort benchmark. The benchmark program will
+construct an array filled with random numbers, call StartStopwatch(), sort
+the array, and call StopStopwatch(). If the time reported in StopStopwatch()
+is less than GLOBALMINTICKS, then the benchmark will build two arrays, and
+try again. If sorting two arrays took less time than GLOBALMINTICKS, the
+process repeats with more arrays.
+
+This goes on until the benchmark makes enough work so that an interval
+between StartStopwatch() and StopStopwatch() exceeds GLOBALMINTICKS. Once
+that happens, the test is actually run, and scores are calculated.
+
+Notice that the benchmark didn't make bigger arrays, it made more arrays.
+That's because the time taken by the sort test does not increase linearly as
+the array grows, it increases by a factor of N*log(N) (where N is the size
+of the array).
+
+This principle is applied to all the benchmark tests. A machine with a less
+accurate clock may be forced to sort more arrays at a time, but the results
+are given in arrays per second. In this way fast machines, slow machines,
+machines with accurate clocks, machines with less accurate clocks, can all
+be tested with the same code.
+
+Confidence Intervals
+
+Another built-in feature of the BYTEmark is a set of statistical-analysis
+routines. Running benchmarks is one thing; the question arises as to how
+many times should a test be run until you know you have a good sampling.
+Also, can you determine whether the test is stable (i.e., do results vary
+widely from one execution of the benchmark to the next)?
+
+The BYTEmark keeps score as follows: Each test (a test being a numeric
+sort, a string sort, etc.) is run five times. These five scores are
+averaged, the standard deviation is determined, and a 95% confidence
+half-interval for the mean is calculated (using the student t
+distribution). This tells us that the true average lies -- with a 95%
+probability -- within plus or minus the confidence half-interval of
+the calculated average. If this half-interval is within 5% of the
+calculated average, the benchmarking stops. Otherwise, a new test is
+run and the calculations are repeated with all of the runs done so
+far, including the new one. The benchmark proceeds this way up to a
+total of 30 runs. If the length of the half-interval is still bigger
+than 5% of the calculated average then a warning issued that the
+results might not be statistically certain before the average is
+displayed.
+
+** Fixed a statistical bug here. Uwe F. Mayer
+
+The upshot is that, for each benchmark test, the true average is -- with a
+95% level of confidence -- within 5% of the average reported. Here, the
+"true average" is the average we would get were we able to run the tests
+over and over again an infinite number of times.
+
+This specification ensures that the calculation of results is controlled;
+that someone running the tests in California will use the same technique for
+determining benchmark results as someone running the tests in New York.
+
+In case there is uneven system load due to other processes while this
+benchmark suite executes, it might take longer to run the benchmark suite
+as compared to a run an unloaded system. This is because the benchmark does
+some statistical analysis to make sure that the reported results are
+statistically significant (as explained above), and a high variation in
+individual runs requires more runs to achieve the required statistical
+confidence.
+
+*** added last the paragraph, Uwe F. Mayer
+
+Interpreting Results
+
+Of course, running the benchmarks can present you with a boatload of data.
+It can get mystifying, and some of the more esoteric statistical information
+is valuable only to a limited audience. The big question is: What does it
+all mean?
+
+First, we should point out that the BYTEmark reports both "raw" and indexed
+scores for each test. The raw score for a particular test amounts to the
+"iterations per second" of that test. For example, the numeric sort test
+reports as its raw score the number of arrays it was able to sort per
+second.
+
+The indexed score is the raw score of the system under test divided by the
+raw score obtained on the baseline machine. As of this release, the
+baseline machine is a DELL 90 Mhz Pentium XPS/90 with 16 MB of RAM and 256K
+of external processor cache. (The compiler used was the Watcom C/C++ 10.0
+compiler; optimizations set to "fastest possible code", 4-byte structure
+alignment, Pentium code generation with Pentium register-based calling. The
+operating system was MSDOS.) The indexed score serves to "normalize" the
+raw scores, reducing their dynamic range and making them easier to
+grasp. Simply put, if your machine has an index score of 2.0 on the numeric
+sort test, it performed that test twice as fast as this 90 Mhz Pentium.
+
+If you run all the tests (as you'll see, it is possible to perform "custom
+runs", which execute only a subset of the tests) the BYTEmark will also
+produce two overall index figures: Integer index and Floating-point index.
+The Integer index is the geometric mean of those tests that involve only
+integer processing -- numeric sort, string sort, bitfield, emulated
+floating-point, assignment, Huffman, and IDEA -- while the Floating-point
+index is the geometric mean of those tests that require the floating-point
+coprocessor -- Fourier, neural net, and LU decomposition. You can use these
+scores to get a general feel for the performance of the machine under test
+as compared to the baseline 90 Mhz Pentium.
+
+The Linux/Unix port has a second baseline machine, it is an AMD K6/233 with
+32 MB RAM and 512 KB L2-cache running Linux 2.0.32 and using GNU gcc
+version 2.7.2.3 and libc-5.4.38. The integer index was split as suggested
+by Andrew D. Balsa <andrewbalsa@usa.net>, and reflects the realization that
+memory management is important in CPU design. The original tests have been
+left alone, however, the geometric mean of the tests NUMERIC SORT, FP
+EMULATION, IDEA, and HUFFMAN now constitutes the integer-arithmetic focused
+benchmark index, while the geometric mean of the tests STRING SORT,
+BITFIELD, and ASSIGNMENT makes up the new memory index. The floating point
+index has been left alone, it is still the geometric mean of FOURIER,
+NEURAL NET, and LU DECOMPOSITION.
+
+*** added the section on Linux, Uwe F. Mayer
+
+What follows is a list of the benchmarks and associated brief remarks that
+describe what the tests do: What they exercise; what a "good" result or a
+"bad" result means. Keep in mind that, in this expanding universe of faster
+processors, bigger caches, more elaborate memory architectures, "good" and
+"bad" are indeed relative terms. A good score on today's hot new processor
+will be a bad score on tomorrow's hot new processor.
+
+These remarks are based on empirical data and profiling that we have done to
+date. (NOTE: The profiling is limited to Intel and Motorola 68K on this
+release. As more data is gathered, we will be refining this section.
+3/14/95--RG)
+
+Benchmark Description
+
+Numeric sort Generic integer performance. Should
+ exercise non-sequential performance
+ of cache (or memory if cache is less
+ than 8K). Moves 32-bit longs at a
+ time, so 16-bit processors will be
+ at a disadvantage.
+
+
+
+String sort Tests memory-move performance.
+ Should exercise non-sequential
+ performance of cache, with added
+ burden that moves are byte-wide and
+ can occur on odd address boundaries.
+ May tax the performance of
+ cell-based processors that must
+ perform additional shift operations
+ to deal with bytes.
+
+
+
+Bitfield Exercises "bit twiddling"
+ performance. Travels through memory
+ in a somewhat sequential fashion;
+ different from sorts in that data is
+ merely altered in place. If
+ properly compiled, takes into
+ account 64-bit processors, which
+ should see a boost.
+
+
+
+Emulated F.P. Past experience has shown this test
+ to be a good measurement of overall
+ performance.
+
+
+
+Fourier Good measure of transcendental and
+ trigonometric performance of FPU.
+ Little array activity, so this test
+ should not be dependent of cache or
+ memory architecture.
+
+
+
+Assignment The test moves through large integer
+ arrays in both row-wise and
+ column-wise fashion. Cache/memory
+ with good sequential performance
+ should see a boost (memory is
+ altered in place -- no moving as in
+ a sort operation). Processing is
+ done in 32-bit chunks -- no
+ advantage given to 64-bit
+ processors.
+
+
+
+Huffman A combination of byte operations,
+ bit twiddling, and overall integer
+ manipulation. Should be a good
+ general measurement.
+
+
+
+IDEA Moves through data sequentially in
+ 16-bit chunks. Should provide a
+ good indication of raw speed.
+
+
+
+Neural Net Small-array floating-point test
+ heavily dependent on the exponential
+ function; less dependent on overall
+ FPU performance. Small arrays, so
+ cache/memory architecture should not
+ come into play.
+
+
+
+LU decomposition. A floating-point test that moves
+ through arrays in both row-wise and
+ column-wise fashion. Exercises only
+ fundamental math operations (+, -,
+ *, /).
+
+The Command File
+
+Purpose
+
+The BYTEmark program allows you to override many of its default parameters
+using a command file. The command file also lets you request statistical
+information, as well as specify an output file to hold the test results for
+later use.
+
+You identify the command file using a command-line argument. E.G.,
+
+C:NBENCH -cCOMFILE.DAT
+
+tells the benchmark program to read from COMFILE.DAT in the current
+directory.
+
+The content of the command file is simply a series of parameter names and
+values, each on a single line. The parameters control internal variables
+that are either global in nature (i.e., they effect all tests in the
+program) or are specific to a given benchmark test.
+
+The parameters are listed in a reference guide that follows, arranged in the
+following groups:
+
+Global Parameters
+
+Numeric Sort
+
+String Sort
+
+Bitfield
+
+Emulated floating-point
+
+Fourier coefficients
+
+Assignment algorithm
+
+IDEA encryption
+
+Huffman compression
+
+Neural net
+
+LU decomposition
+
+As mentioned above, those items listed under "Global Parameters" affect all
+tests; the rest deal with specific benchmarks. There is no required ordering
+to parameters as they appear in the command file. You can specify them in
+any sequence you wish.
+
+You should be judicious in your use of a command file. Some parameters will
+override the "dynamic workload" adjustment that each test performs. Doing
+this completely bypasses the benchmark code that is designed to produce an
+accurate reading from your system clock. Other parameters will alter default
+settings, yielding test results that cannot be compared with published
+benchmark results.
+
+A Sample Command File
+
+Suppose you built a command file that contained the following:
+
+ALLSTATS=T
+
+CUSTOMRUN=T
+
+OUTFILE=D:\DATA.DAT
+
+DONUMSORT=T
+
+DOLU=T
+
+Here's what this file tells the benchmark program:
+
+ALLSTATS=T means that you've requested a "dump" of all the statistics the
+test gathers. This includes not only the standard deviations of tests run,
+it also produces test-specific information such as the number of arrays
+built, the array size, etc.
+
+CUSTOMRUN=T tells the system that this is a custom run. Only tests
+explicitly specified will be executed.
+
+OUTFILE=D:\DATA.DAT will write the output of the benchmark to the file
+DATA.DAT on the root of the D: drive. (If DATA.DAT already exists, output
+will be appended to the file.)
+
+DONUMSORT=T tells the system to run the numeric sort benchmark. (This was
+necessary on account of the CUSTOMRUN=T line, above.)
+
+DOLU=T tells the system to run the LU decomposition benchmark.
+
+Command File Parameters Reference
+
+(NOTE: Altering some global parameters can invalidate results for comparison
+purposes. Those parameters are indicated in the following section by a bold
+asterisk (*). If you alter any parameters so indicated, you may NOT publish
+the resulting data as BYTEmark scores.)
+
+Global Parameters
+
+GLOBALMINTICKS=<n>
+
+This overrides the default global_min_ticks value (defined in NBENCH1.H).
+The global_min_ticks value is defined as the minimum number of clock ticks
+per iteration of a particular benchmark. For example, if global_min_ticks is
+set to 100 and the numeric sort benchmark is run; each iteration MUST take
+at least 100 ticks, or the system will expand the work-per-iteration.
+
+MINSECONDS=<n>
+
+Sets the minimum number of seconds any particular test will run. This has
+the effect of controlling the number of repetitions done. Default: 5.
+
+ALLSTATS=<T|F>
+
+Set this flag to T for a "dump" of all statistics. The information displayed
+varies from test to test. Default: F.
+
+OUTFILE=<path>
+
+Specifies that output should go to the specified output file. Any test
+results and statistical data displayed on-screen will also be written to the
+file. If the file does not exist, it will be created; otherwise, new output
+will be appended to an existing file. This allows you to "capture" several
+runs into a single file for later review.
+
+Note: the path should not appear in quotes. For example, something like the
+following would work: OUTFILE=C:\BENCH\DUMP.DAT
+
+CUSTOMRUN=<T|F>
+
+Set this flag to T for a custom run. A "custom run" means that the program
+will run only the benchmark tests that you explicitly specify. So, use this
+flag to run a subset of the tests. Default: F.
+
+Numeric Sort
+
+DONUMSORT=<T|F>
+
+Indicates whether to do the numeric sort. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case default is F.
+
+NUMNUMARRAYS=<n>
+
+Indicates the number of numeric arrays the system will build. Setting this
+value will override the program's "dynamic workload" adjustment for this
+test.*
+
+NUMARRAYSIZE=<n>
+
+Indicates the number of elements in each numeric array. Default is 8001
+entries. (NOTE: Altering this value will invalidate the test for comparison
+purposes. The performance of the numeric sort test is not related to the
+array size as a linear function; i.e., an array twice as big will not take
+twice as long. The relationship involves a logarithmic function.)*
+
+NUMMINSECONDS=<n>
+
+Overrides MINSECONDS for the numeric sort test.
+
+String Sort
+
+DOSTRINGSORT=<T|F>
+
+Indicates whether to do the string sort. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+STRARRAYSIZE=<n>
+
+Sets the size of the string array. Default is 8111. (NOTE: Altering this
+value will invalidate the test for comparison purposes. The performance of
+the string sort test is not related to the array size as a linear function;
+i.e., an array twice as big will not take twice as long. The relationship
+involves a logarithmic function.)*
+
+NUMSTRARRAYS=<n>
+
+Sets the number of string arrays that will be created to run the test.
+Setting this value will override the program's "dynamic workload" adjustment
+for this test.*
+
+STRMINSECONDS=<n>
+
+Overrides MINSECONDS for the string sort test.
+
+Bitfield
+
+DOBITFIELD=<T|F>
+
+Indicates whether to do the bitfield test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+NUMBITOPS=<n>
+
+Sets the number of bitfield operations that will be performed. Setting this
+value will override the program's "dynamic workload" adjustment for this
+test.*
+
+BITFIELDSIZE=<n>
+
+Sets the number of 32-bit elements in the bitfield arrays. The default value
+is dependent on the size of a long as defined by the current compiler. For a
+typical compiler that defines a long to be 32 bits, the default is 32768.
+(NOTE: Altering this parameter will invalidate test results for comparison
+purposes.)*
+
+BITMINSECONDS=<n>
+
+Overrides MINSECONDS for the bitfield test.
+
+Emulated floating-point
+
+DOEMF=<T|F>
+
+Indicates whether to do the emulated floating-point test. Default is T,
+unless this is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+EMFARRAYSIZE=<n>
+
+Sets the size (number of elements) of the emulated floating-point benchmark.
+Default is 3000. The test builds three arrays, each of equal size. This
+parameter sets the number of elements for EACH array. (NOTE: Altering this
+parameter will invalidate test results for comparison purposes.)*
+
+EMFLOOPS=<n>
+
+Sets the number of loops per iteration of the floating-point test. Setting
+this value will override the program's "dynamic workload" adjustment for
+this test.*
+
+EMFMINSECONDS=<n>
+
+Overrides MINSECONDS for the emulated floating-point test.
+
+Fourier coefficients
+
+DOFOUR=<T|F>
+
+Indicates whether to do the Fourier test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+FOURASIZE=<n>
+
+Sets the size of the array for the Fourier test. This sets the number of
+coefficients the test will derive. NOTE: Specifying this value will override
+the system's "dynamic workload" adjustment for this test, and may make the
+results invalid for comparison purposes.*
+
+FOURMINSECONDS=<n>
+
+Overrides MINSECONDS for the Fourier test.
+
+Assignment Algorithm
+
+DOASSIGN=<T|F>
+
+Indicates whether to do the assignment algorithm test. Default is T, unless
+this is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+ASSIGNARRAYS=<n>
+
+Indicates the number of arrays that will be built for the test. Specifying
+this value will override the system's "dynamic workload" adjustment for this
+test. (NOTE: The size of the arrays in the assignment algorithm is fixed at
+101 x 101. Altering the array size requires adjusting global constants and
+recompiling; to do so, however, would invalidate test results.)*
+
+ASSIGNMINSECONDS=<n>
+
+Overrides MINSECONDS for the assignment algorithm test.
+
+IDEA encryption
+
+DOIDEA=<T|F>
+
+Indicates whether to do the IDEA encryption test. Default is T, unless this
+is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+IDEAARRAYSIZE=<n>
+
+Sets the size of the plain-text character array that will be encrypted by the
+test. Default is 4000. The benchmark actually builds 3 arrays: 1st
+plain-text, encrypted version, and 2nd plain-text. The 2nd plain-text array is
+the destination for the decryption process [part of the test]. All arrays
+are set to the same size. (NOTE: Specifying this value will invalidate test
+results for comparison purposes.)*
+
+IDEALOOPS=<n>
+
+Indicates the number of loops in the IDEA test. Specifying this value will
+override the system's "dynamic workload" adjustment for this test.*
+
+IDEAMINSECONDS=<n>
+
+Overrides MINSECONDS for the IDEA test.
+
+Huffman compression
+
+DOHUFF=<T|F>
+
+Indicates whether to do the Huffman test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+HUFFARRAYSIZE=<n>
+
+Sets the size of the string buffer that will be compressed using the Huffman
+test. The default is 5000. (NOTE: Altering this value will invalidate test
+results for comparison purposes.)*
+
+HUFFLOOPS=<n>
+
+Sets the number of loops in the Huffman test. Specifying this value will
+override the system's "dynamic workload" adjustment for this test.*
+
+HUFFMINSECONDS=<n>
+
+Overrides MINSECONDS for the Huffman test.
+
+Neural net
+
+DONNET=<T|F>
+
+Indicates whether to do the Neural Net test. Default is T, unless this is a
+custom run (CUSTOMRUN=T), in which case the default is F.
+
+NNETLOOPS=<n>
+
+Sets the number of loops in the Neural Net test. NOTE: Altering this value
+overrides the benchmark's "dynamic workload" adjustment algorithm, and may
+invalidate the results for comparison purposes.*
+
+NNETMINSECONDS=<n>
+
+Overrides MINSECONDS for the Neural Net test.
+
+LU decomposition
+
+DOLU=<T|F>
+
+Indicates whether to do the LU decomposition test. Default is T, unless this
+is a custom run (CUSTOMRUN=T), in which case the default is F.
+
+LUNUMARRAYS=<n>
+
+Sets the number of arrays in each iteration of the LU decomposition test.
+Specifying this value will override the system's "dynamic workload"
+adjustment for this test.*
+
+LUMINSECONDS=<n>
+
+Overrides MINSECONDS for the LU decomposition test.
+
+Numeric Sort
+
+Description
+
+This benchmark is designed to explore how well the system sorts a numeric
+array. In this case, a numeric array is a one-dimensional collection of
+signed, 32-bit integers. The actual sorting is performed by a heapsort
+algorithm (see the text box following for a description of the heapsort
+algorithm).
+
+It's probably unnecessary to point out (but we'll do it anyway) that sorting
+is a fundamental operation in computer application software. You'll likely
+find sorting routines nestled deep inside a variety of applications;
+everything from database systems to operating-systems kernels.
+
+The numeric sort benchmark reports the number of arrays it was able to sort
+per second. The array size is set by a global constant (it can be overridden
+by the command file -- see below).
+
+Analysis
+
+Optimized 486 code: Profiling of the numeric sort benchmark using Watcom's
+profiler (Watcom C/C++ 10.0) indicates that the algorithm spends most of its
+time in the numsift() function (specifically, about 90% of the benchmark's
+time takes place in numsift()). Within numsift(), two if statements dominate
+time spent:
+
+if(array[k]<array[k+1L]) and if(array[i]<array[k])
+
+Both statements involve indexes into arrays, so it's likely the processor is
+spending a lot of time resolving the array references. (Though both
+statements involve "less-than" comparisons, we doubt that much time is
+consumed in performing the signed compare operation.) Though the first
+statement involves array elements that are adjacent to one another, the
+second does not. In fact, the second statement will probably involve
+elements that are far apart from one another during early passes through the
+sifting process. We expect that systems whose caching system pre-fetches
+contiguous elements (often in "burst" line fills) will not have any great
+advantage of systems without pre-fetch mechanisms.
+
+Similar results were found when we profiled the numeric sort algorithm under
+the Borland C/C++ compiler.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based; consequently, it does not allow for line-by-line analysis as does the
+Watcom compiler's profiler.
+
+However, the CodeWarrior profiler does give us enough information to note
+that NumSift() only accounts for about 28% of the time consumed by the
+benchmark. The outer routine, NumHeapSort() accounts for around 71% of the
+time taken. It will require additional analysis to determine why the two
+compilers -- Watcom and CodeWarrior divide the workload so differently. (It
+may have something to do with compiler architecture, or the act of profiling
+the code may produce results that are significantly different than how the
+program runs under normal conditions, though that would lead one to wonder
+what use profilers would be.)
+
+Porting Considerations
+
+The numeric sort routine should represent a trivial porting exercise. It is
+not an overly large benchmark in terms of source code. Additionally, the
+only external routines it calls on are for allocating and releasing memory,
+and managing the stopwatch.
+
+The numeric sort benchmark depends on the following global definitions (note
+that these may be overridden by the command file):
+
+NUMNUMARRAYS -- Sets the upper limit on the number of arrays that the
+benchmark will attempt to build. The numeric sort benchmark creates work for
+itself by requiring the system to sort more and more arrays...not bigger and
+bigger arrays. (The latter case would skew results, because the sorting time
+for heapsort is N log2 N - e.g., doubling the array size does not double the
+sort time.) This constant sets the upper limit to the number of arrays the
+system will build before it signals an error. The default value is 100, and
+may be changed if your system exceeds this limit.
+
+NUMARRAYSIZE - Determines the size of each array built. It has been set to
+8111L and should not be tampered with. The command file entry
+NUMARRAYSIZE=<n> can be used to change this value, but results produced by
+doing this will make your results incompatible with other runs of the
+benchmark (since results will be skewed -- see preceding paragraph).
+
+To test for a correct execution of the numeric sort benchmark, #define the
+DEBUG symbol. This will enable code that verifies that arrays are properly
+sorted. You should run the benchmark program using a command file that has
+only the numeric sort test enabled. If there is an error, the program will
+display "SORT ERROR" (If this happens, it's possible that tons of "SORT
+ERROR" messages will be emitted, so it's best not to redirect output to a
+file), otherwise it will print "Numeric sort: OK" (also quite a few times).
+
+References
+
+Gonnet, G.H. 1984, Handbook of Algorithms and Data Structures (Reading, MA:
+Addison-Wesley).
+
+Knuth, Donald E. 1968, Fundamental Algorithms, vol 1 of The Art of Computer
+Programming (Reading, MA: Addison-Wesley).
+
+Press, William H., Flannery, Brian P., Teukolsky, Saul A., and Vetterling,
+William T. 1989, Numerical Recipes in Pascal (Cambridge: Cambridge
+University Press).
+
+Heapsort
+
+The heapsort algorithm is well-covered in a number of the popular
+computer-science textbooks. In fact, it gets a pat on the back in Numerical
+Recipes (Press et. al.), where the authors write:
+
+Heapsort is our favorite sorting routine. It can be recommended
+wholeheartedly for a variety of sorting applications. It is a true
+"in-place" sort, requiring no auxiliary storage.
+
+Heapsort works by building the array into a kind of a queue called a heap.
+You can imagine this heap as being a form of in-memory binary tree. The
+topmost (root) element of the tree is the element that -- were the array
+sorted -- would be the largest element in the array. Sorting takes place by
+first constructing the heap, then pulling the root off the tree, promoting
+the next largest element to the root, pulling it off, and so on. (The
+promotion process is known as "sifting up.")
+
+Heapsort executes in N log2 N time even in its worst case. Unlike some other
+sorting algorithms, it does not benefit from a partially sorted array
+(though Gonnet does refer to a variation of heapsort, called "smoothsort,"
+which does -- see references).
+
+String Sort
+
+Description
+
+This benchmark is designed to gauge how well the system moves bytes around.
+By that we mean, how well the system can copy a string of bytes from one
+location to another; source and destination being aligned to arbitrary
+addresses. (This is unlike the numeric sort array, which moves bytes
+longword-at-a-time.) The strings themselves are built so as to be of random
+length, ranging from no fewer than 4 bytes and no greater than 80 bytes. The
+mixture of random lengths means that processors will be forced to deal with
+strings that begin and end on arbitrary address boundaries.
+
+The string sort benchmark uses the heapsort algorithm; this is the same
+algorithm as is used in the numeric sort benchmark (see the sidebar on the
+heapsort for a detailed description of the algorithm).
+
+Manipulation of the strings is actually handled by two arrays. One array
+holds the strings themselves; the other is a pointers array. Each member of
+the pointers array carries an offset that points into the string array, so
+that the ith pointer carries the offset to the ith string. This allows the
+benchmark to rapidly locate the position of the ith string. (The sorting
+algorithm requires exchanges of items that might be "distant" from one
+another in the array. It's critical that the routine be able to rapidly find
+a string based on its indexed position in the array.)
+
+The string sort benchmark reports the number of string arrays it was able to
+sort per second. The size of the array is set by a global constant.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): Profiling of the string sort
+benchmark indicates that it spends most of its time in the C library routine
+memmove(). Within that routine, most of the execution is consumed by a pair
+of instructions: rep movsw and rep movsd. These are repeated string move --
+word width and repeated string move -- doubleword width, respectively.
+
+This is precisely where we want to see the time spent. It's interesting to
+note that the memmove() of the particular compiler/profiler tested (Watcom
+C/C++ 10.0) was "smart" enough to do most of the moving on word or
+doubleword boundaries. The string sort benchmark specifically sets arbitrary
+boundaries, so we'd expect to see lots of byte-wide moves. The "smart"
+memmove() is able to move bytes only when it has to, and does the remainder
+of the work via words and doublewords (which can move more bits at a time).
+
+680x0 Code (Macintosh CodeWarrior): Because CodeWarrior's profiler is
+function based, it is impossible to get an idea of how much time the test
+spends in library routines such as memmove(). Fortunately, as an artifact of
+the early version of the benchmark, the string sort algorithm makes use of
+the MoveMemory() routine in the sysspec.c file (system specific routines).
+This call, on anything other than a 16-bit DOS system, calls memmove()
+directly. Hence, we can get a good approximation of how much time is spent
+moving bytes.
+
+The answer is that nearly 78% of the benchmark's time is consumed by
+MoveMemory(), the rest being taken up by the other routines (the
+str_is_less() routine, which performs string comparisons, takes about 7% of
+the time). As above, we can guess that most of the benchmark's time is
+dependent on the performance of the library's memmove() routine.
+
+Porting Considerations
+
+As with the numeric sort routine, the string sort benchmark should be simple
+to port. Simpler, in fact. The string sort benchmark routine is not
+dependent on any typedef that may change from machine to machine (unless a
+char type is not 8 bits).
+
+The string sort benchmark depends on the following global definitions:
+
+NUMSTRARRAYS - Sets the upper limit on the number of arrays that the
+benchmark will attempt to build. The string sort benchmark creates work for
+itself by requiring the system to sort more and more arrays, not bigger and
+bigger arrays. (See section on Numeric Sort for an explanation.) This
+constant sets the upper limit to the number of arrays the system will build
+before it signals an error. The default value is 100, and may be changed if
+your system exceeds this limit.
+
+STRARRAYSIZE - Sets the default size of the string arrays built. We say
+"arrays" because, as with the numeric sort benchmark, the system adds work
+not by expanding the size of the array, but by adding more arrays. This
+value is set to 8111, and should not be modified, since results would not be
+comparable with other runs of the same benchmark on other machines.
+
+To test for a correct execution of the string sort benchmark, #define
+the DEBUG symbol. This will enable code that verifies the arrays are
+properly sorted. Set up a command file that runs only the string sort,
+and execute the benchmark program. If the routine is operating
+properly, the benchmark will print "String sort: OK", this message is
+printed quite often. Otherwise, the program will display "SORT ERROR"
+for each pair of strings it finds out of order (which can be really
+often).
+
+References
+
+See the references for the Numeric Sort benchmark.
+
+Bitfield Operations
+
+Description
+
+The purpose of this benchmark is to explore how efficiently the system
+executes operations that deal with "twiddling bits." The test is set up to
+simulate a "bit map"; a data structure used to keep track of storage usage.
+(Don't confuse this meaning of "bitmap" with its use in describing a
+graphics data structure.)
+
+Systems often use bit maps to keep an inventory of memory blocks or (more
+frequently) disk blocks. In the case of a bit map that manages disk usage,
+an operating system will set aside a buffer in memory so that each bit in
+that buffer corresponds to a block on the disk drive. A 0 bit means that the
+corresponding block is free; a 1 bit means the block is in use. Whenever a
+file requests a new block of disk storage, the operating system searches the
+bit map for the first 0 bit, sets the bit (to indicate that the block is now
+spoken for), and returns the number of the corresponding disk block to the
+requesting file.
+
+These types of operations are precisely what this test simulates. A block of
+memory is set allocated for the bit map. Another block of memory is
+allocated, and set up to hold a series of "bit map commands". Each bitmap
+command tells the simulation to do 1 of 3 things:
+
+1) Clear a series of consecutive bits,
+
+2) Set a series of consecutive bits, or
+
+3) Complement (1->0 and 0->1) a series of consecutive bits.
+
+The bit map command block is loaded with a set of random bit map commands
+(each command covers an random number of bits), and simulation routine steps
+sequentially through the command block, grabbing a command and executing it.
+
+The bitfield benchmark reports the number of bits it was able to operate on
+per second. The size of the bit map is constant; the bitfield operations
+array is adjusted based on the capabilities of the processor. (See the
+section describing the auto-adjust feature of the benchmarks.)
+
+Analysis
+
+Optimized 486 code: Using the Watcom C/C++ 10.0 profiler, the Bitfield
+benchmark appears to spend all of its time in two routines: ToggleBitRun()
+(74% of the time) and DoBitFieldIteration() (24% of the time). We say
+"appears" because this is misleading, as we will explain.
+
+First, it is important to recall that the test performs one of three
+operations for each run of bits (see above). The routine ToggleBitRun()
+handles two of those three operations: setting a run of bits and clearing a
+run of bits. An if() statement inside ToggleBitRun() decides which of the
+two operations is performed. (Speed freaks will quite rightly point out that
+this slows the entire algorithm. ToggleBitRun() is called by a switch()
+statement which has already decided whether bits should be set or cleared;
+it's a waste of time to have ToggleBitRun() have to make that decision yet
+again.)
+
+DoBitFieldIteration() is the "outer" routine that calls ToggleBitRun().
+DoBitFieldIteration() also calls FlipBitRun(). This latter routine is the
+one that performs the third bitfield operation: complementing a run of bits.
+FlipBitRun() gets no "air time" at all (while DoBitFieldIteration() gets 24
+% of the time) simply because the compiler's optimizer recognizes that
+FlipBitRun() is only called by DoBitFieldIteration(), and is called only
+once. Consequently, the optimizer moves FlipBitRun() "inline", i.e., into
+DoBitFieldIteration(). This removes an unnecessary call/return cycle (and is
+probably part of the reason why the FlipBitRun() code gets 24% of the
+algorithm's time, instead of something closer to 30% of its time.)
+
+Within the routines, those lines of code that actually do the shifting, the
+and operations, and the or operations, consume time evenly. This should make
+for a good test of a processor's "bit twiddling" capabilities.
+
+680x0 Code (Macintosh CodeWarrior): The CodeWarrior profiler is function
+based. Consequently, it is impossible to produce a profile of machine
+instruction execution time. We can, however, get a good picture of how the
+algorithm divides its time among the various functions.
+
+Unlike the 486 compiler, the CodeWarrior compiler did not appear to collapse
+the FlipBitRun() routine into the outer DoBitFieldIteration() routine. (We
+don't know this for certain, of course. It's possible that the compiler
+would have done this had we not been profiling.)
+
+In any case, the time spent in the two "core" routines of the bitfield test
+are shown below:
+
+FlipBitRun() - 18031.2 microsecs (called 509 times)
+
+ToggleBitRun() - 50770.6 microsecs (called 1031 times)
+
+In terms of total time, FlipBitRun() takes about 35% of the time (it gets
+about 33% of the calls). Remember, ToggleBitRun() is a single routine that
+is called both to set and clear bits. Hence, ToggleBitRun() is called twice
+as often as FlipBitRun().
+
+We can conclude that time spent setting bits to 1, setting bits to 0, and
+changing the state of bits, is about equal; the load is balanced close to
+what we'd expect it to be, based on the structure of the algorithm.
+
+Porting Considerations
+
+The bitfield operations benchmark is dependent on the size of the long
+datatype. On most systems, this is 32 bits. However, on some of the newer
+RISC chips, a long can be 64 bits long. If your system does use 64-bit
+longs, you'll need to #define the symbol LONG64.
+
+If you are unsure of the size of a long in your system (some C compiler
+manuals make it difficult to discover), simply place an ALLSTATS=T line in
+the command file and run the benchmarks. This will cause the benchmark
+program to display (among other things) the size of the data types int,
+short, and long in bytes.
+
+BITFARRAYSIZE - Sets the number of longs in the bit map array. This number
+is fixed, and should not be altered. The bitfield test adjusts itself by
+adding more bitfield commands (see above), not by creating a larger bit map.
+
+Currently, there is no code added to test for correct execution. If you are
+concerned that your port was incorrect, you'll need to step through your
+favorite debugger and verify execution against the original source code.
+
+** I added a resetting of the random number generator, and a resetting
+** of the bitfield to each loop. Those operations are outside of the
+** timed loop, and should add to make the benchmark more consistent.
+** There also is now debugging information available. If you define
+** DEBUG then the program will write a file named "debugbit.dat",
+** which is the contents of the bitfield after the calibration loop of
+** 30 operations. You can compare this file with the file
+** "debugbit.good" that comes with the distribution.
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+None.
+
+Emulated Floating-point
+
+Description
+
+The emulated floating-point benchmark includes routines that are similar to
+those that would be executed whenever a system performs floating-point
+operations in the absence of a coprocessor. In general, this amounts to a
+mixture of integer instructions, including shift operations, integer
+addition and subtraction, and bit testing (among others).
+
+The benchmark itself is remarkably simple. The test builds three
+1-dimensional arrays and loads the first two up with random floating-point
+numbers. The arrays are then partitioned into 4 equal-sized groups, and the
+test proceeds by performing addition, subtraction, multiplication, and
+division -- one operation on each group. (For example, for the addition
+group, an element from the first array is added to the second array and the
+result is placed in the third array.)
+
+Of course, most of the work takes place inside the routines that perform the
+addition, subtraction, multiplication, and division. These routines operate
+on a special data type (referred to as an InternalFPF number) that -- though
+not strictly IEEE compliant -- carries all the necessary data fields to
+support an IEEE-compatible floating-point system. Specifically, an
+InternalFPF number is built up of the following fields:
+
+Type (indicates a NORMAL, SUBNORMAL, etc.)
+
+Mantissa sign
+
+Unbiased, signed 16-bit exponent
+
+4-word (16 bits) mantissa.
+
+The emulated floating-point test reports its results in number of loops per
+second (where a "loop" is one pass through the arrays as described above).
+
+Finally, we are aware that this test could be on its way to becoming an
+anachronism. A growing number of systems are appearing that have
+coprocessors built into the main CPU. It's possible that floating-point
+emulation will one day be a thing of the past.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The algorithm's time is distributed
+across a number of routines. The distribution is:
+
+ShiftMantLeft1() - 60% of the time
+
+ShiftMantRight1() - 17% of the time
+
+DivideInternalFPF() - 14% of the time
+
+MultiplyInternalFPF() - 5% of the time.
+
+The first two routines are similar to one another; both shift bits about in
+a floating-point number's mantissa. It's reasonable that ShiftMantLeft1()
+should take a larger share of the system's time; it is called as part of the
+normalization process that concludes every emulated addition, subtraction,
+mutiplication, and division.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is
+function-based; consequently, it isn't possible to get timing at the machine
+instruction level. However, the output to CodeWarrior's profiler has
+provided insight into the breakdown of time spent in various functions that
+forces us to rethink our 486 code analysis.
+
+Analyzing what goes on inside the emulated floating-point tests is a tough
+one to call because some of the routines that are part of the test are
+called by the function that builds the arrays. Consequently, a quick look at
+the profiler's output can be misleading; it's not obvious how much time a
+particular routine is spending in the test and how much time that same
+routine is spending setting up the test (an operation that does not get
+timed).
+
+Specifically, the routine that loads up the arrays with test data calls
+LongToInternalFPF() and DivideInternalFPF(). LongToInternalFPF() makes one
+call to normalize() if the number is not a true zero. In turn, normalize()
+makes an indeterminate number of calls to ShiftMantLeft1(), depending on the
+structure of the mantissa being normalized.
+
+What's worse, DivideInternalFPF() makes all sorts of calls to all kinds of
+important low-level routines such as Sub16Bits() and ShiftMantLeft1().
+Untangling the wiring of which routine is being called as part of the test,
+and which is being called as part of the setup could probably be done with
+the computer equivalent of detective work and spelunking, but in the
+interest of time we'll opt for approximation.
+
+Here's a breakdown of some of the important routines and their times:
+
+AddSubInternalFPF() - 1003.9 microsecs (called 9024 times)
+
+MultiplyInternalFPF() - 20143 microsecs (called 5610 times)
+
+DivideInternalFPF() - 18820.9 microsecs (called 3366 times).
+
+The 3366 calls to DivideInternalFPF() are timed calls, not setup calls --
+the profiler at least gives outputs of separate calls made to the same
+routine, so we can determine which call is being made by the benchmark, and
+which is being made by the setup routine. It turns out that the setup
+routine calls DivideInternalFPF() 30,000 times.
+
+Notice that though addition/subtraction are called most often,
+multiplication next, then finally division; the time spent in each is the
+reverse. Division takes the most time, then multiplication, finally
+addition/subtraction. (There's probably some universal truth lurking here
+somewhere, but we haven't found it yet.)
+
+Other routines, and their breakdown:
+
+Add16Bits() - 115.3 microsecs
+
+ShiftMantRight1() - 574.2 microsecs
+
+Sub16Bits() - 1762 microsecs
+
+StickySiftRightMant - 40.4 microsecs
+
+ShiftMantLeft1() - 17486.1 microsecs
+
+The times for the last three routines are suspect, since they are called by
+DivideInternalFPF(), and a large portion of their time could be part of the
+setup process. This is what leads us to question the results obtained in the
+486 analysis, since it, too, is unable to determine precisely who is calling
+whom.
+
+Porting Considerations
+
+Earlier versions of this benchmark were extremely sensitive to porting;
+particularly to the "endianism" of the target system. We have tried to
+eliminate many of these problems. The test is nonetheless more "sensitive"
+to porting than most others.
+
+Pay close attention to the following defines and typedefs. They can be found
+in the files EMFLOAT.H, NMGLOBAL.H, and NBENCH1.H:
+
+u8 - Stands for unsigned, 8-bit. Usually defined to be unsigned char.
+
+u16 - Stands for unsigned, 16-bit. Usually defined to be unsigned short.
+
+u32 - Stands for unsigned, 32-bit. Usually defined to be unsigned long.
+
+INTERNAL_FPF_PRECISION - Indicates the number of elements in the mantissa of
+an InternalFPF number. Should be set to 4.
+
+The exponent field of an InternalFPF number is of type short. It should be
+set to whatever minimal data type can hold a signed, 16-bit number.
+
+Other global definitions you will want to be aware of:
+
+CPUEMFLOATLOOPMAX - Sets the maximum number of loops the benchmark will
+attempt before flagging an error. Each execution of a loop in the emulated
+floating-point test is "non-destructive," since the test takes factors from
+two arrays, operates on the factors, and places the result in a third array.
+Consequently, the test makes more work for itself by increasing the number
+of times it passes through the arrays (# of loops). If the system exceeds
+the limit set by CPUEMFLOATLOOPMAX, it will signal an error.
+
+This value may be altered to suit your system; it will not effect the
+benchmark results (unless you reduce it so much the system can never
+generate enough loops to produce a good test run).
+
+EMFARRAYSIZE - Sets the size of the arrays to be used in the test. This
+value is the number of entries (InternalFPF numbers) per array. Currently,
+the number is fixed at 3000, and should not be altered.
+
+Currently, there is no means of testing correct execution of the benchmark
+other than via debugger. There are routines available to decode the internal
+floating point format and print out the numbers, but no formal correctness
+test has been constructed. (This should be available soon. -- 3/14/95 RG)
+
+** It now prints out the operations of 8 of the entries used in the
+** test. Assuming you leave EMFARRAYSIZE at 3000, your results should
+** look like the ones below. The number in front of the colon is the
+** index of the entry.
+**
+** 2: (-1.1160E 0) + (-4.5159E 0) = -5.6320E 0
+** 6: (-4.4507E -1) - (-8.2050E -1) = +3.7543E -1
+** 10: (+1.2465E 0) * (+7.4667E -1) = +9.3075E -1
+** 14: (-1.2781E 0) / (-1.7367E 0) = +7.3596E -1
+** 2986: (-7.0390E 0) * (-2.0752E 0) = +1.4607E 1
+** 2990: (+8.3753E -1) / (+2.3876E 1) = +3.5078E -2
+** 2994: (-1.1393E 0) + (-1.6080E 1) = -1.7219E 1
+** 2998: (+7.2450E 0) - (-8.2654E -1) = +8.0716E 0
+**
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Microprocessor Programming for Computer Hobbyists, Neill Graham, Tab Books,
+Blue Ridge Summit, PA, 1977.
+
+Apple Numerica Manual, Second edition, Apple Computer, Addison-Wesley
+Publishing Co., Reading, MA, 1988.
+
+Fourier Series
+
+Description
+
+This is a floating-point benchmark designed primarily to exercise the
+trigonometric and transcendental functions of the system. It calculates the
+first n Fourier coefficients of the function (x+1)x on the interval 0,2. In
+this case, the function (x+1)x is being treated as a cyclic waveform with a
+period of 2.
+
+The Fourier coefficients, when applied as factors to a properly constructed
+series of sine and cosine functions, allow you to approximate the original
+waveform. (In fact, if you can calculate all the Fourier coefficients --
+there'll be an infinite number -- you can reconstruct the waveform exactly).
+You have to calculate the coefficients via integration, and the algorithm
+does this using a simple trapezoidal rule for its numeric integration
+function.
+
+The upshot of all this is that it provides an exercise for the
+floating-point routines that calculate sine, cosine, and raising a number to
+a power. There are also some floating-point multiplications, divisions,
+additions, and subtractions mixed in.
+
+The benchmark reports its results as the number of coefficients calculated
+per second.
+
+As an additional note, we should point out that the performance of this
+benchmark is heavily dependent on how well-built the compiler's math library
+is. We have seen at least two cases where recompilation with new (and
+improved!) math libraries have resulted in two-fold and five-fold
+performance improvements. (Apparently, when a compiler gets moved to a new
+platform, the trigonometric and transcendental functions in the math
+libraries are among the last routines to be "hand optimized" for the new
+platform.) About all we can say about this is that whenever you run this
+test, verify that you have the latest and greatest math libraries.
+
+Analysis
+
+Optimized 486 code: The benchmark partitions its time almost evenly among
+the modules pow387, exp386, and trig387; giving between 25% and 28% of its
+time to each. This is based on profiling with the Watcom compiler running
+under Windows NT. These modules hold the routines that handle raising a
+number to a power and performing trigonometric (sine and cosine)
+calculations. For example, within trig387, time was nearly equally divided
+between the routine that calculates sine and the routine that calculates
+cosine.
+
+The remaining time (between 17% and 18%) was spent in the balance of the
+test. We noticed that most of that time occurred in the routine
+thefunction(). This is at the heart of the numerical integration routine the
+benchmark uses.
+
+Consequently, this benchmark should be a good test of the exponential and
+trigonometric capabilities of a processor. (Note that we recognize that the
+performance also depends on how well the compiler's math library is built.)
+
+680x0 Code (Macintosh CodeWarrior): The CodeWarrior profiler is function
+based, therefore it is impossible to get performance results for individual
+machine instructions. The CodeWarrior compiler is also unable to tell us how
+much time is spent within a given library routine; we can't see how much
+time gets spent executing the sin(), cos(), or pow() functions (which,
+unfortunately, was the whole idea behind the benchmark).
+
+About all we can glean from the results is that thefunction() takes about
+74% of the time in the test (this is where the heavy math calculations take
+place) while trapezoidintegrate() accounts for about 26% of the time on its
+own.
+
+Porting Considerations
+
+Necessarily, this benchmark is at the mercy of the efficiency of the
+floating-point support provided by whatever compiler you are using. It is
+recommended that, if you are doing the port yourself, you contact the
+designers of the compiler, and discuss with them what optimization switches
+should be set to produce the fastest code. (This sounds simple; usually it's
+not. Some systems let you decide between speed and true IEEE compliance.)
+
+As far as global definitions go, this benchmark is happily free of them. All
+the math is done using double data types. We have noticed that, on some Unix
+systems, you must be careful to include the correct math libraries.
+Typically, you'll discover this at link time.
+
+To test for correct execution of the benchmark: It's unlikely you'll need to
+do this, since the algorithm is so cut-and-dried. Furthermore, there are no
+explicit provisions made to verify the correctness. You can, however, either
+dip into your favorite debugger, or alter the code to print out the contents
+of the abase (which holds the A[i] terms) and bbase (which holds the B[i]
+terms) arrays as they are being filled (see routine DoFPUTransIteration).
+** This is exactly what I have done, it now prints out A[i] and B[i] data.
+** Uwe F. Mayer <mayer@tux.edu>
+Run the benchmark with a command file set to execute only the Fourier test,
+and examine the contents of the arrays. The first 100 are listed below.
+
+A[i]=
+ 2.84 1.05 0.274 0.0824 0.0102 -0.024 -0.0426 -0.0536 -0.0605 -0.065
+-0.0679 -0.0698 -0.0709 -0.0715 -0.0717 -0.0715 -0.0711 -0.0704
+-0.0696 -0.0685 -0.0674 -0.0661 -0.0647 -0.0632 -0.0615 -0.0598 -0.058
+-0.0561 -0.0542 -0.0521 -0.0501 -0.0479 -0.0457 -0.0434 -0.0411
+-0.0387 -0.0363 -0.0338 -0.0313 -0.0288 -0.0262 -0.0236 -0.0209
+-0.0183 -0.0156 -0.0129 -0.0102 -0.00744 -0.0047 -0.00196 0.000794
+0.00355 0.0063 0.00905 0.0118 0.0145 0.0172 0.0199 0.0226 0.0253
+0.0279 0.0305 0.0331 0.0357 0.0382 0.0407 0.0431 0.0455 0.0479 0.0502
+0.0525 0.0547 0.0569 0.059 0.061 0.063 0.0649 0.0668 0.0686 0.0703
+0.072 0.0736 0.0751 0.0765 0.0779 0.0792 0.0804 0.0816 0.0826 0.0836
+0.0845 0.0853 0.0861 0.0867 0.0873 0.0877 0.0881 0.0884 0.0887 0.0888
+
+B[i]=
+(undefined) -1.88 -1.16 -0.806 -0.61 -0.487 -0.402 -0.34 -0.293 -0.255
+-0.224 -0.199 -0.177 -0.158 -0.141 -0.126 -0.113 -0.101 -0.0901
+-0.0802 -0.071 -0.0625 -0.0546 -0.0473 -0.0404 -0.034 -0.0279 -0.0222
+-0.0168 -0.0117 -0.00693 -0.00238 0.00193 0.00601 0.00988 0.0135 0.017
+0.0203 0.0234 0.0263 0.0291 0.0317 0.0341 0.0364 0.0385 0.0405 0.0424
+0.0441 0.0457 0.0471 0.0484 0.0496 0.0507 0.0516 0.0525 0.0532 0.0538
+0.0543 0.0546 0.0549 0.055 0.0551 0.055 0.0549 0.0546 0.0543 0.0538
+0.0533 0.0527 0.052 0.0512 0.0503 0.0493 0.0483 0.0472 0.046 0.0447
+0.0434 0.042 0.0405 0.039 0.0374 0.0358 0.0341 0.0323 0.0305 0.0287
+0.0268 0.0249 0.023 0.021 0.019 0.0169 0.0149 0.0128 0.0107 0.00857
+0.00644 0.0043 0.00215
+
+Note that there is no B[0] coefficient. If the above numbers are in the
+arrays shown, you can feel pretty confident that the benchmark it working
+properly.
+
+References
+
+Engineering and Scientific Computations in Pascal, Lawrence P. Huelsman,
+Harper & Row, New York, 1986.
+
+Assignment Algorithm
+
+Description
+
+This test is built on an algorithm with direct application to the business
+world. The assignment algorithm solves the following problem: Say you have X
+machines and Y jobs. Any of the machines can do any of the jobs; however, the
+machines are sufficiently different so that the cost of doing a particular
+job can vary depending what machine does it. Furthermore, the jobs are
+sufficiently different that the cost varies depending on which job a given
+machine does. You therefore construct a matrix; machines are the rows, jobs
+are the columns, and the [i,j] element of the array is the cost of doing the
+jth job on the ith machine. How can you assign the jobs so that the cost of
+completing them all is minimal? (This also assumes that one machine does one
+job.)
+
+Did you get that?
+
+The assignment algorithm benchmark is largely a test of how well the
+processor handles problems built around array manipulation. It is not a
+floating-point test; the "cost matrix" built by the algorithm is simply a 2D
+array of long integers. This benchmark considers an iteration to be a run of
+the assignment algorithm on a 101 x 101 - element matrix. It reports its
+results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): There are numerous loops within the
+assignment algorithm. The development system we were using (Watcom C/C++
+10.0) appears to have a fine time unrolling many of them. Consequently, it
+is difficult to pin down the execution impact of single lines (as in, for
+example, the numeric sort benchmark).
+
+On the level of functions, the benchmark spends around 70% of its time in
+the routine first_assignments(). This is where a) lone zeros in rows and
+columns are found and selected, and b) a choice is made between duplicate
+zeros. Around 23% of the time is spent in the second_assignments() routine
+where (if first_assignments() fails) the matrix is partitioned into smaller
+submatrices.
+
+Overall, we did a tally of instruction mix execution. The approximate
+breakdowns are:
+
+move - 38%
+
+conditional jump - 12%
+
+unconditional jump - 11%
+
+comparison - 14%
+
+math/logical/shift - 24%
+
+Many of the move instructions that appeared to consume the most amounts of
+time were referencing items on the local stack frame. This required an
+indirect reference through EBP, plus a constant offset to resolve the
+address.
+
+This should be a good exercise of a cache, since operations in the
+first_assignments() routine require both row-wise and column-wise movement
+through the array. Note that the routine could be made more "severe" by
+chancing the assignedtableau[][] array to an array of unsigned char --
+forcing fetches on byte boundaries.
+
+680x0 Code (CodeWarrior): The CodeWarrior profiler is function-based.
+Consequently, it's not possible to determine what's going on at the machine
+instruction level. We can, however, get a good idea of how much time the
+algorithm spends in each routine. The important routines are broken down as
+follows:
+
+calc_minimum_costs() - approximately 0.3% of the time
+
+(250 microsecs)
+
+first_assignments() - approximately 79% of the time
+
+(96284.6 microsecs)
+
+second_assignments() - approximately 19% of the time
+
+(22758 microsecs)
+
+These times are approximate; some time is spent in the Assignment() routine
+itself.
+
+These figures are reasonably close to those of the 486, at least in terms of
+the mixture of time spent in a particular routine. Hence, this should still
+be a good test of system cache (as described in the preceding section),
+given the behavior of the first_assignments() routine.
+
+Porting Considerations
+
+The assignment algorithm test is purely an integer benchmark, and requires
+no special data types that might be affected by ports to different
+architectures. There are only two global constants that affect the
+algorithm:
+
+ASSIGNROWS and ASSIGNCOLS - These set the size of the assignment array. Both
+are defined to be 101 (so, the array that is benchmarked is a 101 x 101
+-element array of longs). These values should not be altered.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, set up a command file that executes only the assignment
+algorithm, and run the benchmark. (You may want to pipe the output through a
+paging filter, like the more program.) The act of defining DEBUG will enable
+a section of code that displays the assigned columns on a per-row basis. If
+the benchmark is working properly, the numbers to be displayed
+should be:
+
+R000: 056 R001: 066 R002: 052 R003: 065 R004: 043 R005: 023 R006: 016
+R007: 077 R008: 095 R009: 004 R010: 064 R011: 076 R012: 078 R013: 091
+R014: 013 R015: 029 R016: 044 R017: 014 R018: 041 R019: 042 R020: 020
+R021: 071 R022: 024 R023: 017 R024: 055 R025: 040 R026: 070 R027: 025
+R028: 031 R029: 019 R030: 073 R031: 002 R032: 047 R033: 009 R034: 035
+R035: 045 R036: 005 R037: 063 R038: 081 R039: 039 R040: 087 R041: 008
+R042: 053 R043: 093 R044: 049 R045: 092 R046: 061 R047: 046 R048: 026
+R049: 034 R050: 088 R051: 000 R052: 028 R053: 018 R054: 072 R055: 021
+R056: 037 R057: 082 R058: 006 R059: 058 R060: 096 R061: 068 R062: 069
+R063: 054 R064: 057 R065: 086 R066: 097 R067: 084 R068: 099 R069: 051
+R070: 098 R071: 003 R072: 074 R073: 062 R074: 080 R075: 033 R076: 011
+R077: 094 R078: 012 R079: 050 R080: 010 R081: 038 R082: 089 R083: 059
+R084: 022 R085: 079 R086: 015 R087: 007 R088: 075 R089: 083 R090: 060
+R091: 048 R092: 032 R093: 067 R094: 001 R095: 030 R096: 027 R097: 085
+R098: 090 R099: 036 R100: 100
+
+These are the column choices for each row made by the algorithm. If
+you see these numbers displayed, the algorithm is working correctly.
+
+*** The original debugging information was incorrect, as it not only
+*** display the chosen columns, but also displayed eliminated columns.
+*** Changed to show all 101 entries. Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Quantitative Decision Making for Business, Gordon, Pressman, and Cohn,
+Prentice-Hall, Englewood Cliffs, NJ, 1990.
+
+Quantitative Decision Making, Guiseppi A. Forgionne, Wadsworth Publishing
+Co., California, 1986.
+
+Huffman Compression
+
+Description
+
+This is a compression algorithm that -- while helpful for some time as a
+text compression technique -- has since fallen out of fashion on account of
+the superior performance by algorithms such as LZW compression. It is,
+however, still used in some graphics file formats in one form or another.
+
+The benchmark consists of three parts:
+
+Building a "Huffman Tree" (explained below),
+
+Compression, and
+
+Decompression.
+
+A "Huffman Tree" is a special data structure that guides the compression and
+decompression processes. If you were to diagram one, it would look like a
+large binary tree (i.e., two branches per each node). Describing its
+function in detail is beyond the scope of this paper (see the references for
+more information). We should, however, point out that the tree is built from
+the "bottom up"; and the procedure for constructing it requires that the
+algorithm scan the uncompressed buffer, building a frequency table for all
+the characters appearing in the buffer. (This version of the Huffman
+algorithm compresses byte-at-a-time, though there's no reason why the same
+principle could not be applied to tokens larger than one byte.)
+
+Once the tree is built, text compression is relatively straightforward. The
+algorithm fetches a character from the uncompressed buffer, navigates the
+tree based on the character's value, and produces a bit stream that is
+concatenated to the compressed buffer. Decompression is the reverse of that
+process. (We recognize that we are simplifying the algorithm. Again, we
+recommend you check the references.)
+
+The Huffman Compression benchmark considers an iteration to be the three
+operations described above, performed on an uncompressed text buffer of 5000
+bytes. It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The Huffman compression algorithm --
+tree building, compression, and decompression -- is written as a single,
+large routine: DoHuffIteration(). All the benchmark's time is spent within
+that routine.
+
+Components of DoHuffIteration() that consume the most time are those that
+perform the compression and decompression .
+
+The code for performing the compression spends most of its time (accounting
+for about 13%) constructing the bit string for a character that is being
+compressed. It does this by seeking up the tree from a leaf, emitting 1's
+and 0's in the process, until it reaches the root. The stream of 1's and 0's
+are loaded into a character array; the algorithm then walks "backward"
+through the array, setting (or clearing) bits in the compression buffer as
+it goes.
+
+Similarly, the decompression portion takes about 12% of the time as the
+algorithm pulls bits out of the compressed buffer -- using them to navigate
+the Huffman tree -- and reconstructs the original text.
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based. Consequently, it's impossible to get performance scores for
+individual machine instructions. Furthermore, as mentioned above, the
+Huffman compression algorithm is written as a monolithic routine. This makes
+the results from the CodeWarrior profiler all the more sparse.
+
+We can at least point out that the lowmost routines (GetCompBit() and
+SetCompBit()) that read and write individual bits, though called nearly 13
+million times each, account for only 0.7% and 0.3% of the total time,
+respectively.
+
+Porting Considerations
+
+The Huffman algorithm relies on no special data types. It should port
+readily. Global constants of interest include:
+
+EXCLUDED - This is a large, positive value. Currently it is set to 32000,
+and should be left alone. Basically, this is a token that the system uses to
+indicate an excluded character (one that does not appear in the plain-text).
+It is set to a ridiculously high value that will never appear in the
+pointers of the tree during normal construction.
+
+MAXHUFFLOOPS - This is another one of those "governor" constants. The
+Huffman benchmark creates more work for itself by doing multiple
+compression/decompression loops. This constant sets the maximum number of
+loops it will attempt per iteration before it gives up. Currently, it is set
+to 50000. Though it is unlikely you'll ever need to modify this value, you
+can increase it if your machine is too fast for the adjustment algorithm. Do
+not reduce the number.
+
+HUFFARRAYSIZE - This value sets the size of the plain-text array to be
+compressed. You can override this value with the command file to see how
+well your machine performs for larger or smaller arrays. The subsequent
+results, however, are invalid for comparison with other systems.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the Huffman compression
+algorithm, and run the benchmark. Defining DEBUG will enable a section of
+code that verifies the decompression as it takes place (i.e., the routine
+compares -- character at a time -- the uncompressed data with the original
+plain-text). If there's an error, the program will repeatedly display: "Error
+at textoffset xxx".
+
+** If everything is correct it will emit quite a few "Huffman: OK" messages.
+**
+** I added a resetting of the random number generator, outside of the
+** timed loop, and a resetting of the Huffman tree, inside of the
+** timed loop. That should help to make the benchmark more consistent.
+** The program did originally only reset half of the tree, which lead
+** to runtime errors on some systems. The effect on the benchmark
+** should be negligible, and in fact comes out as being of the order
+** of less than 1% on my test system.
+** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Data Compression: Methods and Theory, James A. Storer, Computer Science
+Press, Rockville, MD, 1988.
+
+An Introduction to Text Processing, Peter D. Smith, MIT Press, Cambridge,
+MA, 1990.
+
+IDEA Encryption
+
+Description
+
+This is another benchmark based on a "higher-level" algorithm; "higher
+-level" in the sense that it is more complex than a sort or a search
+operation.
+
+Security -- and, therefore, cryptography -- are becoming increasingly
+important issues in the computer realm. It's likely that more and more
+machines will be running routines like the IDEA encryption algorithm. (IDEA
+is an acronym for the International Data Encryption Algorithm.)
+
+A good description of the algorithm (and, in fact, the reference we used to
+create the source code for the test) can be found in Bruce Schneier's
+exhaustive exploration of encryption, "Applied Cryptography" (see
+references). To quote Mr. Schneier: "In my opinion, it [IDEA] is the best
+and most secure block algorithm available to the public at this time."
+
+IDEA is a symmetrical, block cipher algorithm. Symmetrical means that the
+same routine used to encrypt the data also decrypts the data. A block cipher
+works on the plain-text (the message to be encrypted) in fixed, discrete
+chunks. In the case of IDEA, the algorithm encrypts and decrypts 64 bits at
+a time.
+
+As pointed out in Schneier's book, there are three operations that the IDEA
+uses to do its work:
+
+XOR (exclusive-or)
+
+Addition modulo 216 (ignoring overflow)
+
+Multiplication modulo 216+1 (ignoring overflow).
+
+IDEA requires a key of 128 bits. However, keys and blocks are further
+subdivided into 16-bit chunks, so that any given operation within the IDEA
+encryption is performed on 16-bit quantities. (This is one of the many
+advantages of the algorithm, it is efficient even on 16-bit processors.)
+
+The IDEA benchmark considers an "iteration" to be an encryption and
+decryption of a buffer of 4000 bytes. The test actually builds 3 buffers:
+The first to hold the original plain-text, the second to hold the encrypted
+text, and the third to hold the decrypted text (the contents of which should
+match that of the first buffer). It reports its results in iterations per
+second.
+
+Analysis
+
+Optimized 486 code: The algorithm actually spends most of its time (nearly
+75%) within the mul() routine, which performs the multiplication modulo
+216+1. This is a super-simple routine, consisting primarily of if
+statements, shifts, and additions.
+
+The remaining time (around 24%) is spent in the balance of the cipher_idea()
+routine. (Note that cipher_idea() calls the mul() routine frequently; so,
+the 24% is comprised of the other lines of cipher_idea()). cipher_idea() is
+littered with simple pointer-fetch-and-increment operations, some addition,
+and some exclusive-or operations.
+
+Note that IDEA's exercise of system capabilities probably doesn't extend
+beyond testing simple integer math operations. Since the buffer size is set
+to 4000 bytes, the test will run entirely in processor cache on most
+systems. Even the cache won't get a heavy "internal" workout, since the
+algorithm proceeds sequentially through each buffer from lower to higher
+addresses.
+
+680x0 code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based; consequently, it is impossible to determine execution profiles for
+individual machine instructions. We can, however, get an idea of how much
+time is spent in each routine.
+
+As with Huffman compression, the IDEA algorithm is written monolithically --
+a single, large routine does most of the work. However, a special
+multiplication routine, mul(), is frequently called within each
+encryption/decryption iteration (see above).
+
+In this instance, the results for the 68K system diverges widely from those
+of the 486 system. The CodeWarrior profiler shows the mul() routine as
+taking only 4% of the total time in the benchmark, even though it is called
+over 20 million times. The outer routine is called 600,000 times, and
+accounts for about 96% of the whole program's entire time.
+
+Porting Considerations
+
+Since IDEA does its work in 16-bit units, it is particularly important that
+u16 be defined to whatever datatype provides an unsigned 16-bit integer on
+the test platform. Usually, unsigned short works for this. (You can verify
+the size of a short by running the benchmarks with a command file that
+includes ALLSTATS=T as one of the commands. This will cause the benchmark
+program to display a message that tells the size of the int, short, and long
+data-types in bytes.)
+
+Also, the mul() routine in IDEA requires the u32 datatype to define an
+unsigned 32-bit integer. In most cases, unsigned long works.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the IDEA algorithm, and
+run the benchmark. Defining DEBUG will enable a section of code that
+compares the original plain-text with the output of the test. (Remember, the
+benchmark performs both encryption and decryption.) If the algorithm has
+failed, the output will not match the input, and you'll see "IDEA Error"
+messages all over your display.
+
+References
+
+Applied Cryptography: Protocols, Algorithms, and Source Code in C, Bruce
+Schneier, John Wiley & Sons, Inc., New York, 1994.
+
+Neural Net
+
+Description
+
+The Neural Net simulation benchmark is based on a simple back-propagation
+neural network presented by Maureen Caudill as part of a BYTE article that
+appeared in the October, 1991 issue (see "Expert Networks" in that issue).
+The network involved is a simple 3-layer (input neurodes, middle-layer
+neurodes, and output neurodes) network that accepts a number of 5 x 7 input
+patterns and produce a single 8-bit output pattern.
+
+The test involves sending the network an input pattern that is the 5 x 7
+"image" of a character (1's and 0's -- 1's representing lit pixels, 0's
+representing unlit pixels), and teaching it the 8-bit ASCII code for the
+character.
+
+A thorough description of how the back propagation algorithm works is beyond
+the scope of this paper. We recommend you search through the references
+given at the end of this paper, particularly Ms. Caudill's article, for
+detailed discussion. In brief, the benchmark is primarily an exercise in
+floating-point operations, with some frequent use of the exp() function. It
+also performs a great deal of array references, though the arrays in use are
+well under 300 elements each (and less than 100 in most cases).
+
+The Neural Net benchmark considers an iteration to be a single learning
+cycle. (A "learning cycle" is defined as the time it takes the network to be
+able to associate all input patterns to the correct output patterns within a
+specified tolerance.) It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code: The forward pass of the network (i.e., calculating
+outputs from inputs) utilize a sigmoid function. This function has, at its
+heart, a call to the exp() library routine. A small but non-negligible
+amount of time is spent in that function (a little over 5% for the 486
+system we tested).
+
+The learning portion of the network benchmark depends on the derivative of
+the sigmoid function, which turns out to require only multiplications and
+subtractions. Consequently, each learning pass exercises only simple
+floating-point operations.
+
+If we divide the time spent in the test into two parts -- forward pass and
+backward pass (the latter being the learning pass) -- then the test appears
+to spend the greatest part of its time in the learning phase. In fact, most
+time is spent in the adjust_mid_wts() routine. This is the part of the
+routine that alters the weights on the middle layer neurodes. (It accounts
+for over 40% of the benchmark's time.)
+
+680x0 Code (Macintosh CodeWarrior): Though CodeWarrior's profiler is
+function based, the neural net benchmark is highly modular. We can therefore
+get a good breakdown of routine usage:
+
+worst_pass_error() - 304 microsecs (called 4680 times)
+
+adjust_mid_wts() - 83277 microsecs (called 46800 times)
+
+adjust_out_wts() - 17394 microsecs (called 46800 times)
+
+do_mid_error() - 11512 microsecs (called 46800 times)
+
+do_out_error() - 3002 microsecs (called 46800 times)
+
+do_mid_forward() - 49559 microsecs (called 46800 times)
+
+do_out_forward() - 20634 microsecs (called 46800 times)
+
+Again, most time was spent in adjust_mid_wts() (as on the 486), accounting
+for almost twice as much time as do_mid_forward().
+
+Porting Consideration
+
+The Neural Net benchmark is not dependent on any special data types. There
+are a number of global variables and arrays that should not be altered in
+any way. Most importantly, the #defines found in NBENCH1.H under the Neural
+Net section should not be changed. These control not only the number of
+neurodes in each layer; they also include constants that govern the learning
+processes.
+
+Other globals to be aware of:
+
+MAXNNETLOOPS - This constant simply sets the upper limit on the number of
+training loops the test will permit per iteration. The Neural Net benchmark
+adjusts its workload by re-teaching itself over and over (each time it
+begins a new training session, the network is "cleared" -- loaded with
+random values). It is unlikely you will ever need to modify this constant.
+
+inpath - This string pointer is set to the path from which the neural net's
+input data is read. It is currently hardwired to "NNET.DAT". You shouldn't
+have to change this name, unless your file system requires directory
+information as part of the path.
+
+Note that the Neural Net benchmark is the only test that requires an
+external data file. The contents of the file are listed in an attachment to
+this paper. You should use the attachment to reconstruct the file should it
+become lost or corrupted. Any changes to the file will invalidate the test
+results.
+
+To test for correct execution of the benchmark: #define the symbol DEBUG,
+recompile, build a command file that executes only the Neural Net test, and
+run the benchmark. Defining DEBUG will enable a section of code that
+displays how many passes through the learning process were required for the
+net to learn. It should learn in 780 passes.
+
+References
+
+"Expert Networks," Maureen Caudill, BYTE Magazine, October, 1991.
+
+Simulating Neural Networks, Norbert Hoffmann, Verlag Vieweg, Wiesbaden,
+1994.
+
+Signal and Image Processing with Neural Networks, Timothy Masters, John
+Wiley and Sons, New York, 1994.
+
+Introduction to Neural Networks, Jeannette Stanley, California Scientific
+Software, CA, 1989.
+
+LU Decomposition
+
+Description
+
+LU Decomposition is an algorithm that can be used as the heart of a program
+for solving linear equations. Suppose you have a matrix A. LU Decomposition
+determines the matrices L and U such that
+
+L . U = A
+
+where L is a lower triangular matrix and U is an upper triangular matrix. (A
+lower triangular matrix has nonzero elements only on the main diagonal and
+below. An upper triangular matrix has nonzero elements only on the main
+diagonal and above.)
+
+Without going into the mathematical details too deeply, having the L and U
+matrices makes the solution of linear equations (i.e., equations of the form
+A . x = b) quite easy. It turns out that you can also use LU decomposition
+to determine matrix inverses and determinants.
+
+The algorithm used in the benchmarks was derived from Numerical Recipes in
+Pascal (there is a C version of the book, which we did not have on hand), a
+book we heartily recommend to anyone serious about mathematical and
+scientific computing. The authors are approving of LU decomposition as a
+means of solving linear equations, pointing out that their version (which
+makes use of what we would have to call "Crout's method with partial
+implicit pivoting") is a factor of 3 better than one of their Gauss-Jordan
+routines, a factor of 1.5 better than another. They go on to demonstrate the
+use of LU decomposition for iterative improvement of linear equation
+solutions.
+
+The benchmark begins by creating a "solvable" linear system. This is easily
+done by loading up the column vector b with random integers, then
+initializing A with an identity matrix. The equations are then "scrambled"
+by either multiplying a row by a constant, or adding one row to another. The
+scrambled matrices are handed to the LU algorithm.
+
+The LU Decomposition benchmark considers a single iteration to be the
+solution of one set of equations (the size of A is fixed at 101 x 101
+elements). It reports its results in iterations per second.
+
+Analysis
+
+Optimized 486 code (Watcom C/C++ 10.0): The entire algorithm consists of two
+parts: the LU decomposition itself, and the back substitution algorithm that
+builds the solution vector. The majority of the algorithm's time takes place
+within the former; the algorithm that builds the L and U matrices (this
+takes place in routine ludcmp()).
+
+Within ludcmp(), there are two extremely tight for loops forming the heart
+of Crout's algorithm that consume the majority of the time. The loops are
+"tight" in that they each consist of only one line of code; in both cases,
+the line of code is a "multiply and accumulate" operation (actually, it's
+sort of a multiply and de-accumulate, since the result of the multiplication
+is subtracted, not added).
+
+In both cases, the items multiplied are elements from the A array; and one
+factor's row index is varying more rapidly, while another factor's column
+index is varying more rapidly.
+
+Note that this is a good overall test of floating-point operations within
+matrices. Most of the math is floating-point; primarily additions,
+subtractions, and multiplications (only a few divisions).
+
+680x0 Code (Macintosh CodeWarrior): CodeWarrior's profiler is function
+based. It is therefore impossible to determine execution profiles at the
+machine-code level. The profiler does, however, allow us to determine how
+much time the benchmark spends in each routine. This breakdown is as
+follows:
+
+lusolve() - 3.4 microsecs (about 0% of the time)
+
+lubksb() 1198 microsec (about 2% of the time)
+
+ludcmp() - 63171 microsec (about 91% of the time)
+
+The above percentages are for the whole program. Consequently, as a portion
+of actual benchmark time, the amount attributed to each will be slightly
+larger (though the proportions will remain the same).
+
+Since ludcmp() performs the actual LU decomposition, this is exactly where
+we'd want the benchmark to spend its time. The lubksb() routine calls
+ludcmp(), using the resulting matrix to "back-solve" the linear equation.
+
+Porting Considerations
+
+The LU Decomposition routine requires no special data types, and is immune
+to byte ordering. It does make use of a typedef (LUdblptr) that includes an
+embedded union; this allows the benchmark to "coerce" a pointer to double
+into a pointer to a 2D array of double. This arrangement has not caused
+problems with the compilers we have tested to date.
+
+Other constants and globals to be aware of:
+
+LUARRAYROWS and LUARRAYCOLS - These constants set the size of the
+coefficient matrix, A. They cannot be altered by command file. In fact, you
+shouldn't alter them at all, or your results will be invalid. Currently,
+they are both set to 101.
+
+MAXLUARRAYS - This is another "governor" constant. The algorithm performs
+dynamic workload adjustment by building more and more arrays to solve per
+timing round. This sets the maximum upper limit of arrays that it will
+build. Currently, it is set to 1000, which should be more than enough for
+the reasonable future (1000 arrays of 101 x 101 floating-point doubles would
+require somewhere around 80 megabytes of RAM -- and that's not counting the
+column vectors).
+
+To test for correct execution of the benchmark: Currently, there is no
+simple technique for doing this. You can, however, either use your favorite
+debugger (or embed a printf() statement) at the conclusion of the lubksb()
+routine. When this routine concludes, the array b will hold the solution
+vector. These items will be stored as floating-point doubles, and the first
+14 are (with rounding):
+
+46 20 23 22 85 86 97 95 8 89 75 67 6 86
+
+If you find these numbers as the first 14 in the array b[], then you're
+virtually guaranteed that the algorithm is working correctly.
+
+*** The above is not correct, as the initial matrix is not the identity,
+*** but a matrix with random nonzero entries on the diagonal (they have
+*** altered the algorithm since they wrote the documentation).
+*** I changed the output of the debugging routine, it now prints first
+*** what the array b should hold (as righthand side divided by diagonal
+*** entry), and then it prints what the array b does hold after the
+*** decomposition has been done to compute the solution of the system. If
+*** you get the same, then fine.
+*** And, by the way, my original right hand sides are
+*** 46 23 85 97 8 75 6 81 88 76 6 84 31 53 2 ...
+*** and the diagonal entries are
+*** 520 922 186 495 89 267 786 571 175 600 738 321 897 541 859 ...
+*** You notice that one has every other number of the original sequence.
+*** This is due to BYTE's change of the algorithm, as they now also use the
+*** random number generator to generate the diagonal elements.
+*** Here is the complete set of data:
+*** 46/520=0.09 23/922=0.02 85/186=0.46 97/495=0.20 8/89=0.09
+*** 75/267=0.28 6/786=0.01 81/571=0.14 88/175=0.50 76/600=0.13
+*** 6/738=0.01 84/321=0.26 31/897=0.03 53/541=0.10 2/859=0.00
+*** 86/92=0.93 51/121=0.42 29/248=0.12 51/789=0.06 84/6=14.00
+*** 21/180=0.12 33/48=0.69 2/899=0.00 12/820=0.01 69/372=0.19
+*** 59/809=0.07 74/18=4.11 40/788=0.05 39/56=0.70 86/91=0.95
+*** 33/878=0.04 82/165=0.50 42/561=0.07 8/274=0.03 84/694=0.12
+*** 32/352=0.09 25/969=0.03 59/816=0.07 33/112=0.29 5/125=0.04
+*** 89/740=0.12 7/223=0.03 54/994=0.05 33/80=0.41 55/676=0.08
+*** 6/524=0.01 36/544=0.07 21/160=0.13 58/596=0.10 15/717=0.02
+*** 84/311=0.27 98/530=0.18 46/713=0.06 41/233=0.18 73/640=0.11
+*** 40/343=0.12 72/586=0.12 100/965=0.10 59/764=0.08 37/866=0.04
+*** 27/682=0.04 3/652=0.00 41/352=0.12 87/786=0.11 45/79=0.57
+*** 83/761=0.11 41/817=0.05 46/209=0.22 78/930=0.08 85/210=0.40
+*** 80/756=0.11 18/931=0.02 30/669=0.04 47/127=0.37 85/891=0.10
+*** 66/364=0.18 83/955=0.09 58/637=0.09 58/778=0.07 82/288=0.28
+*** 42/540=0.08 76/290=0.26 59/36=1.64 29/463=0.06 63/476=0.13
+*** 6/340=0.02 73/341=0.21 59/737=0.08 81/492=0.16 98/443=0.22
+*** 58/32=1.81 53/562=0.09 54/263=0.21 46/367=0.13 58/390=0.15
+*** 96/845=0.11 30/746=0.04 2/687=0.00 28/849=0.03 84/180=0.47
+*** 85/382=0.22
+*** Uwe F. Mayer <mayer@tux.edu>
+
+References
+
+Numerical Recipes in Pascal: The Art of Scientific Computing, Press,
+Flannery, Teukolsky, Vetterling, Cambridge University Press, New York, 1989.
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/debugbit.good.gz b/benchmarks/nbench/nbench-byte-2.2.3/debugbit.good.gz
new file mode 100644
index 0000000..fdc893e
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/debugbit.good.gz
Binary files differ
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/emfloat.c b/benchmarks/nbench/nbench-byte-2.2.3/emfloat.c
new file mode 100644
index 0000000..5e73890
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/emfloat.c
@@ -0,0 +1,1343 @@
+/*
+** emfloat.c
+** Source for emulated floating-point routines.
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine.
+**
+** Created:
+** Last update: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+
+#include <stdio.h>
+#include <string.h>
+#include "nmglobal.h"
+#include "emfloat.h"
+
+/*
+** Floating-point emulator.
+** These routines are only "sort of" IEEE-compliant. All work is
+** done using an internal representation. Also, the routines do
+** not check for many of the exceptions that might occur.
+** Still, the external formats produced are IEEE-compatible,
+** with the restriction that they presume a low-endian machine
+** (though the endianism will not effect the performance).
+**
+** Some code here was based on work done by Steve Snelgrove of
+** Orem, UT. Other code comes from routines presented in
+** the long-ago book: "Microprocessor Programming for
+** Computer Hobbyists" by Neill Graham.
+*/
+
+/**************************
+** SetupCPUEmFloatArrays **
+***************************
+** Set up the arrays that will be used in the emulated
+** floating-point tests.
+** This is done by loading abase and bbase elements with
+** random numbers. We use our long-to-floating point
+** routine to set them up.
+** NOTE: We really don't need the pointer to cbase...cbase
+** is overwritten in the benchmark.
+*/
+void SetupCPUEmFloatArrays(InternalFPF *abase,
+ InternalFPF *bbase,
+ InternalFPF *cbase,
+ ulong arraysize)
+{
+ulong i;
+InternalFPF locFPF1,locFPF2;
+/*
+** Reset random number generator so things repeat. Inserted by Uwe F. Mayer.
+*/
+extern int32 randnum(int32 lngval);
+randnum((int32)13);
+
+for(i=0;i<arraysize;i++)
+{/* LongToInternalFPF(randwc(50000L),&locFPF1); */
+ Int32ToInternalFPF(randwc((int32)50000),&locFPF1);
+ /* LongToInternalFPF(randwc(50000L)+1L,&locFPF2); */
+ Int32ToInternalFPF(randwc((int32)50000)+(int32)1,&locFPF2);
+ DivideInternalFPF(&locFPF1,&locFPF2,abase+i);
+ /* LongToInternalFPF(randwc(50000L)+1L,&locFPF2); */
+ Int32ToInternalFPF(randwc((int32)50000)+(int32)1,&locFPF2);
+ DivideInternalFPF(&locFPF1,&locFPF2,bbase+i);
+}
+return;
+}
+
+/***********************
+** DoEmFloatIteration **
+************************
+** Perform an iteration of the emulated floating-point
+** benchmark. Note that "an iteration" can involve multiple
+** loops through the benchmark.
+*/
+ulong DoEmFloatIteration(InternalFPF *abase,
+ InternalFPF *bbase,
+ InternalFPF *cbase,
+ ulong arraysize, ulong loops)
+{
+ulong elapsed; /* For the stopwatch */
+static uchar jtable[16] = {0,0,0,0,1,1,1,1,2,2,2,2,2,3,3,3};
+ulong i;
+#ifdef DEBUG
+int number_of_loops;
+#endif
+/*
+** Begin timing
+*/
+elapsed=StartStopwatch();
+#ifdef DEBUG
+number_of_loops=loops-1; /* the index of the first loop we run */
+#endif
+
+/*
+** Each pass through the array performs operations in
+** the followingratios:
+** 4 adds, 4 subtracts, 5 multiplies, 3 divides
+** (adds and subtracts being nearly the same operation)
+*/
+while(loops--)
+{
+ for(i=0;i<arraysize;i++)
+ switch(jtable[i % 16])
+ {
+ case 0: /* Add */
+ AddSubInternalFPF(0,abase+i,
+ bbase+i,
+ cbase+i);
+ break;
+ case 1: /* Subtract */
+ AddSubInternalFPF(1,abase+i,
+ bbase+i,
+ cbase+i);
+ break;
+ case 2: /* Multiply */
+ MultiplyInternalFPF(abase+i,
+ bbase+i,
+ cbase+i);
+ break;
+ case 3: /* Divide */
+ DivideInternalFPF(abase+i,
+ bbase+i,
+ cbase+i);
+ break;
+ }
+#ifdef DEBUG
+{
+ ulong j[8]; /* we test 8 entries */
+ int k;
+ ulong i;
+ char buffer[1024];
+ if (number_of_loops==loops) /* the first loop */
+ {
+ j[0]=(ulong)2;
+ j[1]=(ulong)6;
+ j[2]=(ulong)10;
+ j[3]=(ulong)14;
+ j[4]=(ulong)(arraysize-14);
+ j[5]=(ulong)(arraysize-10);
+ j[6]=(ulong)(arraysize-6);
+ j[7]=(ulong)(arraysize-2);
+ for(k=0;k<8;k++){
+ i=j[k];
+ InternalFPFToString(buffer,abase+i);
+ printf("%6ld: (%s) ",i,buffer);
+ switch(jtable[i % 16])
+ {
+ case 0: strcpy(buffer,"+"); break;
+ case 1: strcpy(buffer,"-"); break;
+ case 2: strcpy(buffer,"*"); break;
+ case 3: strcpy(buffer,"/"); break;
+ }
+ printf("%s ",buffer);
+ InternalFPFToString(buffer,bbase+i);
+ printf("(%s) = ",buffer);
+ InternalFPFToString(buffer,cbase+i);
+ printf("%s\n",buffer);
+ }
+ }
+}
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** SetInternalFPFZero **
+************************
+** Set an internal floating-point-format number to zero.
+** sign determines the sign of the zero.
+*/
+static void SetInternalFPFZero(InternalFPF *dest,
+ uchar sign)
+{
+int i; /* Index */
+
+dest->type=IFPF_IS_ZERO;
+dest->sign=sign;
+dest->exp=MIN_EXP;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+ dest->mantissa[i]=0;
+return;
+}
+
+/***************************
+** SetInternalFPFInfinity **
+****************************
+** Set an internal floating-point-format number to infinity.
+** This can happen if the exponent exceeds MAX_EXP.
+** As above, sign picks the sign of infinity.
+*/
+static void SetInternalFPFInfinity(InternalFPF *dest,
+ uchar sign)
+{
+int i; /* Index */
+
+dest->type=IFPF_IS_INFINITY;
+dest->sign=sign;
+dest->exp=MIN_EXP;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+ dest->mantissa[i]=0;
+return;
+}
+
+/**********************
+** SetInternalFPFNaN **
+***********************
+** Set an internal floating-point-format number to Nan
+** (not a number). Note that we "emulate" an 80x87 as far
+** as the mantissa bits go.
+*/
+static void SetInternalFPFNaN(InternalFPF *dest)
+{
+int i; /* Index */
+
+dest->type=IFPF_IS_NAN;
+dest->exp=MAX_EXP;
+dest->sign=1;
+dest->mantissa[0]=0x4000;
+for(i=1;i<INTERNAL_FPF_PRECISION;i++)
+ dest->mantissa[i]=0;
+
+return;
+}
+
+/*******************
+** IsMantissaZero **
+********************
+** Pass this routine a pointer to an internal floating point format
+** number's mantissa. It checks for an all-zero mantissa.
+** Returns 0 if it is NOT all zeros, !=0 otherwise.
+*/
+static int IsMantissaZero(u16 *mant)
+{
+int i; /* Index */
+int n; /* Return value */
+
+n=0;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+ n|=mant[i];
+
+return(!n);
+}
+
+/**************
+** Add16Bits **
+***************
+** Add b, c, and carry. Retult in a. New carry in carry.
+*/
+static void Add16Bits(u16 *carry,
+ u16 *a,
+ u16 b,
+ u16 c)
+{
+u32 accum; /* Accumulator */
+
+/*
+** Do the work in the 32-bit accumulator so we can return
+** the carry.
+*/
+accum=(u32)b;
+accum+=(u32)c;
+accum+=(u32)*carry;
+*carry=(u16)((accum & 0x00010000) ? 1 : 0); /* New carry */
+*a=(u16)(accum & 0xFFFF); /* Result is lo 16 bits */
+return;
+}
+
+/**************
+** Sub16Bits **
+***************
+** Additive inverse of above.
+*/
+static void Sub16Bits(u16 *borrow,
+ u16 *a,
+ u16 b,
+ u16 c)
+{
+u32 accum; /* Accumulator */
+
+accum=(u32)b;
+accum-=(u32)c;
+accum-=(u32)*borrow;
+*borrow=(u32)((accum & 0x00010000) ? 1 : 0); /* New borrow */
+*a=(u16)(accum & 0xFFFF);
+return;
+}
+
+/*******************
+** ShiftMantLeft1 **
+********************
+** Shift a vector of 16-bit numbers left 1 bit. Also provides
+** a carry bit, which is shifted in at the beginning, and
+** shifted out at the end.
+*/
+static void ShiftMantLeft1(u16 *carry,
+ u16 *mantissa)
+{
+int i; /* Index */
+int new_carry;
+u16 accum; /* Temporary holding placed */
+
+for(i=INTERNAL_FPF_PRECISION-1;i>=0;i--)
+{ accum=mantissa[i];
+ new_carry=accum & 0x8000; /* Get new carry */
+ accum=accum<<1; /* Do the shift */
+ if(*carry)
+ accum|=1; /* Insert previous carry */
+ *carry=new_carry;
+ mantissa[i]=accum; /* Return shifted value */
+}
+return;
+}
+
+/********************
+** ShiftMantRight1 **
+*********************
+** Shift a mantissa right by 1 bit. Provides carry, as
+** above
+*/
+static void ShiftMantRight1(u16 *carry,
+ u16 *mantissa)
+{
+int i; /* Index */
+int new_carry;
+u16 accum;
+
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+{ accum=mantissa[i];
+ new_carry=accum & 1; /* Get new carry */
+ accum=accum>>1;
+ if(*carry)
+ accum|=0x8000;
+ *carry=new_carry;
+ mantissa[i]=accum;
+}
+return;
+}
+
+
+/*****************************
+** StickyShiftMantRight **
+******************************
+** This is a shift right of the mantissa with a "sticky bit".
+** I.E., if a carry of 1 is shifted out of the least significant
+** bit, the least significant bit is set to 1.
+*/
+static void StickyShiftRightMant(InternalFPF *ptr,
+ int amount)
+{
+int i; /* Index */
+u16 carry; /* Self-explanatory */
+u16 *mantissa;
+
+mantissa=ptr->mantissa;
+
+if(ptr->type!=IFPF_IS_ZERO) /* Don't bother shifting a zero */
+{
+ /*
+ ** If the amount of shifting will shift everyting
+ ** out of existence, then just clear the whole mantissa
+ ** and set the lowmost bit to 1.
+ */
+ if(amount>=INTERNAL_FPF_PRECISION * 16)
+ {
+ for(i=0;i<INTERNAL_FPF_PRECISION-1;i++)
+ mantissa[i]=0;
+ mantissa[INTERNAL_FPF_PRECISION-1]=1;
+ }
+ else
+ for(i=0;i<amount;i++)
+ {
+ carry=0;
+ ShiftMantRight1(&carry,mantissa);
+ if(carry)
+ mantissa[INTERNAL_FPF_PRECISION-1] |= 1;
+ }
+}
+return;
+}
+
+
+/**************************************************
+** POST ARITHMETIC PROCESSING **
+** (NORMALIZE, ROUND, OVERFLOW, AND UNDERFLOW) **
+**************************************************/
+
+/**************
+** normalize **
+***************
+** Normalize an internal-representation number. Normalization
+** discards empty most-significant bits.
+*/
+static void normalize(InternalFPF *ptr)
+{
+u16 carry;
+
+/*
+** As long as there's a highmost 0 bit, shift the significand
+** left 1 bit. Each time you do this, though, you've
+** gotta decrement the exponent.
+*/
+while ((ptr->mantissa[0] & 0x8000) == 0)
+{
+ carry = 0;
+ ShiftMantLeft1(&carry, ptr->mantissa);
+ ptr->exp--;
+}
+return;
+}
+
+/****************
+** denormalize **
+*****************
+** Denormalize an internal-representation number. This means
+** shifting it right until its exponent is equivalent to
+** minimum_exponent. (You have to do this often in order
+** to perform additions and subtractions).
+*/
+static void denormalize(InternalFPF *ptr,
+ int minimum_exponent)
+{
+long exponent_difference;
+
+if (IsMantissaZero(ptr->mantissa))
+{
+ printf("Error: zero significand in denormalize\n");
+}
+
+exponent_difference = ptr->exp-minimum_exponent;
+if (exponent_difference < 0)
+{
+ /*
+ ** The number is subnormal
+ */
+ exponent_difference = -exponent_difference;
+ if (exponent_difference >= (INTERNAL_FPF_PRECISION * 16))
+ {
+ /* Underflow */
+ SetInternalFPFZero(ptr, ptr->sign);
+ }
+ else
+ {
+ ptr->exp+=exponent_difference;
+ StickyShiftRightMant(ptr, exponent_difference);
+ }
+}
+return;
+}
+
+
+/*********************
+** RoundInternalFPF **
+**********************
+** Round an internal-representation number.
+** The kind of rounding we do here is simplest...referred to as
+** "chop". "Extraneous" rightmost bits are simply hacked off.
+*/
+void RoundInternalFPF(InternalFPF *ptr)
+{
+/* int i; */
+
+if (ptr->type == IFPF_IS_NORMAL ||
+ ptr->type == IFPF_IS_SUBNORMAL)
+{
+ denormalize(ptr, MIN_EXP);
+ if (ptr->type != IFPF_IS_ZERO)
+ {
+
+ /* clear the extraneous bits */
+ ptr->mantissa[3] &= 0xfff8;
+/* for (i=4; i<INTERNAL_FPF_PRECISION; i++)
+ {
+ ptr->mantissa[i] = 0;
+ }
+*/
+ /*
+ ** Check for overflow
+ */
+/* Does not do anything as ptr->exp is a short and MAX_EXP=37268
+ if (ptr->exp > MAX_EXP)
+ {
+ SetInternalFPFInfinity(ptr, ptr->sign);
+ }
+*/
+ }
+}
+return;
+}
+
+/*******************************************************
+** ARITHMETIC OPERATIONS ON INTERNAL REPRESENTATION **
+*******************************************************/
+
+/***************
+** choose_nan **
+****************
+** Called by routines that are forced to perform math on
+** a pair of NaN's. This routine "selects" which NaN is
+** to be returned.
+*/
+static void choose_nan(InternalFPF *x,
+ InternalFPF *y,
+ InternalFPF *z,
+ int intel_flag)
+{
+int i;
+
+/*
+** Compare the two mantissas,
+** return the larger. Note that we will be emulating
+** an 80387 in this operation.
+*/
+for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+{
+ if (x->mantissa[i] > y->mantissa[i])
+ {
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+ return;
+ }
+ if (x->mantissa[i] < y->mantissa[i])
+ {
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+ return;
+ }
+}
+
+/*
+** They are equal
+*/
+if (!intel_flag)
+ /* if the operation is addition */
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+else
+ /* if the operation is multiplication */
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+return;
+}
+
+
+/**********************
+** AddSubInternalFPF **
+***********************
+** Adding or subtracting internal-representation numbers.
+** Internal-representation numbers pointed to by x and y are
+** added/subtracted and the result returned in z.
+*/
+static void AddSubInternalFPF(uchar operation,
+ InternalFPF *x,
+ InternalFPF *y,
+ InternalFPF *z)
+{
+int exponent_difference;
+u16 borrow;
+u16 carry;
+int i;
+InternalFPF locx,locy; /* Needed since we alter them */
+
+/*
+** Following big switch statement handles the
+** various combinations of operand types.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case ZERO_ZERO:
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+ if (x->sign ^ y->sign ^ operation)
+ {
+ z->sign = 0; /* positive */
+ }
+ break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+case INFINITY_ZERO:
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+ break;
+
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+ break;
+
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+case ZERO_INFINITY:
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+ z->sign ^= operation;
+ break;
+
+case SUBNORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_SUBNORMAL:
+case NORMAL_NORMAL:
+ /*
+ ** Copy x and y to locals, since we may have
+ ** to alter them.
+ */
+ memmove((void *)&locx,(void *)x,sizeof(InternalFPF));
+ memmove((void *)&locy,(void *)y,sizeof(InternalFPF));
+
+ /* compute sum/difference */
+ exponent_difference = locx.exp-locy.exp;
+ if (exponent_difference == 0)
+ {
+ /*
+ ** locx.exp == locy.exp
+ ** so, no shifting required
+ */
+ if (locx.type == IFPF_IS_SUBNORMAL ||
+ locy.type == IFPF_IS_SUBNORMAL)
+ z->type = IFPF_IS_SUBNORMAL;
+ else
+ z->type = IFPF_IS_NORMAL;
+
+ /*
+ ** Assume that locx.mantissa > locy.mantissa
+ */
+ z->sign = locx.sign;
+ z->exp= locx.exp;
+ }
+ else
+ if (exponent_difference > 0)
+ {
+ /*
+ ** locx.exp > locy.exp
+ */
+ StickyShiftRightMant(&locy,
+ exponent_difference);
+ z->type = locx.type;
+ z->sign = locx.sign;
+ z->exp = locx.exp;
+ }
+ else /* if (exponent_difference < 0) */
+ {
+ /*
+ ** locx.exp < locy.exp
+ */
+ StickyShiftRightMant(&locx,
+ -exponent_difference);
+ z->type = locy.type;
+ z->sign = locy.sign ^ operation;
+ z->exp = locy.exp;
+ }
+
+ if (locx.sign ^ locy.sign ^ operation)
+ {
+ /*
+ ** Signs are different, subtract mantissas
+ */
+ borrow = 0;
+ for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+ Sub16Bits(&borrow,
+ &z->mantissa[i],
+ locx.mantissa[i],
+ locy.mantissa[i]);
+
+ if (borrow)
+ {
+ /* The y->mantissa was larger than the
+ ** x->mantissa leaving a negative
+ ** result. Change the result back to
+ ** an unsigned number and flip the
+ ** sign flag.
+ */
+ z->sign = locy.sign ^ operation;
+ borrow = 0;
+ for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+ {
+ Sub16Bits(&borrow,
+ &z->mantissa[i],
+ 0,
+ z->mantissa[i]);
+ }
+ }
+ else
+ {
+ /* The assumption made above
+ ** (i.e. x->mantissa >= y->mantissa)
+ ** was correct. Therefore, do nothing.
+ ** z->sign = x->sign;
+ */
+ }
+
+ if (IsMantissaZero(z->mantissa))
+ {
+ z->type = IFPF_IS_ZERO;
+ z->sign = 0; /* positive */
+ }
+ else
+ if (locx.type == IFPF_IS_NORMAL ||
+ locy.type == IFPF_IS_NORMAL)
+ {
+ normalize(z);
+ }
+ }
+ else
+ {
+ /* signs are the same, add mantissas */
+ carry = 0;
+ for (i=(INTERNAL_FPF_PRECISION-1); i>=0; i--)
+ {
+ Add16Bits(&carry,
+ &z->mantissa[i],
+ locx.mantissa[i],
+ locy.mantissa[i]);
+ }
+
+ if (carry)
+ {
+ z->exp++;
+ carry=0;
+ ShiftMantRight1(&carry,z->mantissa);
+ z->mantissa[0] |= 0x8000;
+ z->type = IFPF_IS_NORMAL;
+ }
+ else
+ if (z->mantissa[0] & 0x8000)
+ z->type = IFPF_IS_NORMAL;
+ }
+ break;
+
+case INFINITY_INFINITY:
+ SetInternalFPFNaN(z);
+ break;
+
+case NAN_NAN:
+ choose_nan(x, y, z, 1);
+ break;
+}
+
+/*
+** All the math is done; time to round.
+*/
+RoundInternalFPF(z);
+return;
+}
+
+
+/************************
+** MultiplyInternalFPF **
+*************************
+** Two internal-representation numbers x and y are multiplied; the
+** result is returned in z.
+*/
+static void MultiplyInternalFPF(InternalFPF *x,
+ InternalFPF *y,
+ InternalFPF *z)
+{
+int i;
+int j;
+u16 carry;
+u16 extra_bits[INTERNAL_FPF_PRECISION];
+InternalFPF locy; /* Needed since this will be altered */
+/*
+** As in the preceding function, this large switch
+** statement selects among the many combinations
+** of operands.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+case INFINITY_INFINITY:
+case ZERO_ZERO:
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+ z->sign ^= y->sign;
+ break;
+
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+ z->sign ^= x->sign;
+ break;
+
+case ZERO_INFINITY:
+case INFINITY_ZERO:
+ SetInternalFPFNaN(z);
+ break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+ break;
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+ break;
+
+
+case SUBNORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_SUBNORMAL:
+case NORMAL_NORMAL:
+ /*
+ ** Make a local copy of the y number, since we will be
+ ** altering it in the process of multiplying.
+ */
+ memmove((void *)&locy,(void *)y,sizeof(InternalFPF));
+
+ /*
+ ** Check for unnormal zero arguments
+ */
+ if (IsMantissaZero(x->mantissa) || IsMantissaZero(y->mantissa))
+ SetInternalFPFInfinity(z, 0);
+
+ /*
+ ** Initialize the result
+ */
+ if (x->type == IFPF_IS_SUBNORMAL ||
+ y->type == IFPF_IS_SUBNORMAL)
+ z->type = IFPF_IS_SUBNORMAL;
+ else
+ z->type = IFPF_IS_NORMAL;
+
+ z->sign = x->sign ^ y->sign;
+ z->exp = x->exp + y->exp ;
+ for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+ {
+ z->mantissa[i] = 0;
+ extra_bits[i] = 0;
+ }
+
+ for (i=0; i<(INTERNAL_FPF_PRECISION*16); i++)
+ {
+ /*
+ ** Get rightmost bit of the multiplier
+ */
+ carry = 0;
+ ShiftMantRight1(&carry, locy.mantissa);
+ if (carry)
+ {
+ /*
+ ** Add the multiplicand to the product
+ */
+ carry = 0;
+ for (j=(INTERNAL_FPF_PRECISION-1); j>=0; j--)
+ Add16Bits(&carry,
+ &z->mantissa[j],
+ z->mantissa[j],
+ x->mantissa[j]);
+ }
+ else
+ {
+ carry = 0;
+ }
+
+ /*
+ ** Shift the product right. Overflow bits get
+ ** shifted into extra_bits. We'll use it later
+ ** to help with the "sticky" bit.
+ */
+ ShiftMantRight1(&carry, z->mantissa);
+ ShiftMantRight1(&carry, extra_bits);
+ }
+
+ /*
+ ** Normalize
+ ** Note that we use a "special" normalization routine
+ ** because we need to use the extra bits. (These are
+ ** bits that may have been shifted off the bottom that
+ ** we want to reclaim...if we can.
+ */
+ while ((z->mantissa[0] & 0x8000) == 0)
+ {
+ carry = 0;
+ ShiftMantLeft1(&carry, extra_bits);
+ ShiftMantLeft1(&carry, z->mantissa);
+ z->exp--;
+ }
+
+ /*
+ ** Set the sticky bit if any bits set in extra bits.
+ */
+ if (IsMantissaZero(extra_bits))
+ {
+ z->mantissa[INTERNAL_FPF_PRECISION-1] |= 1;
+ }
+ break;
+
+case NAN_NAN:
+ choose_nan(x, y, z, 0);
+ break;
+}
+
+/*
+** All math done...do rounding.
+*/
+RoundInternalFPF(z);
+return;
+}
+
+
+/**********************
+** DivideInternalFPF **
+***********************
+** Divide internal FPF number x by y. Return result in z.
+*/
+static void DivideInternalFPF(InternalFPF *x,
+ InternalFPF *y,
+ InternalFPF *z)
+{
+int i;
+int j;
+u16 carry;
+u16 extra_bits[INTERNAL_FPF_PRECISION];
+InternalFPF locx; /* Local for x number */
+
+/*
+** As with preceding function, the following switch
+** statement selects among the various possible
+** operands.
+*/
+switch ((x->type * IFPF_TYPE_COUNT) + y->type)
+{
+case ZERO_ZERO:
+case INFINITY_INFINITY:
+ SetInternalFPFNaN(z);
+ break;
+
+case ZERO_SUBNORMAL:
+case ZERO_NORMAL:
+ if (IsMantissaZero(y->mantissa))
+ {
+ SetInternalFPFNaN(z);
+ break;
+ }
+
+case ZERO_INFINITY:
+case SUBNORMAL_INFINITY:
+case NORMAL_INFINITY:
+ SetInternalFPFZero(z, x->sign ^ y->sign);
+ break;
+
+case SUBNORMAL_ZERO:
+case NORMAL_ZERO:
+ if (IsMantissaZero(x->mantissa))
+ {
+ SetInternalFPFNaN(z);
+ break;
+ }
+
+case INFINITY_ZERO:
+case INFINITY_SUBNORMAL:
+case INFINITY_NORMAL:
+ SetInternalFPFInfinity(z, 0);
+ z->sign = x->sign ^ y->sign;
+ break;
+
+case NAN_ZERO:
+case NAN_SUBNORMAL:
+case NAN_NORMAL:
+case NAN_INFINITY:
+ memmove((void *)x,(void *)z,sizeof(InternalFPF));
+ break;
+
+case ZERO_NAN:
+case SUBNORMAL_NAN:
+case NORMAL_NAN:
+case INFINITY_NAN:
+ memmove((void *)y,(void *)z,sizeof(InternalFPF));
+ break;
+
+case SUBNORMAL_SUBNORMAL:
+case NORMAL_SUBNORMAL:
+case SUBNORMAL_NORMAL:
+case NORMAL_NORMAL:
+ /*
+ ** Make local copy of x number, since we'll be
+ ** altering it in the process of dividing.
+ */
+ memmove((void *)&locx,(void *)x,sizeof(InternalFPF));
+
+ /*
+ ** Check for unnormal zero arguments
+ */
+ if (IsMantissaZero(locx.mantissa))
+ {
+ if (IsMantissaZero(y->mantissa))
+ SetInternalFPFNaN(z);
+ else
+ SetInternalFPFZero(z, 0);
+ break;
+ }
+ if (IsMantissaZero(y->mantissa))
+ {
+ SetInternalFPFInfinity(z, 0);
+ break;
+ }
+
+ /*
+ ** Initialize the result
+ */
+ z->type = x->type;
+ z->sign = x->sign ^ y->sign;
+ z->exp = x->exp - y->exp +
+ ((INTERNAL_FPF_PRECISION * 16 * 2));
+ for (i=0; i<INTERNAL_FPF_PRECISION; i++)
+ {
+ z->mantissa[i] = 0;
+ extra_bits[i] = 0;
+ }
+
+ while ((z->mantissa[0] & 0x8000) == 0)
+ {
+ carry = 0;
+ ShiftMantLeft1(&carry, locx.mantissa);
+ ShiftMantLeft1(&carry, extra_bits);
+
+ /*
+ ** Time to subtract yet?
+ */
+ if (carry == 0)
+ for (j=0; j<INTERNAL_FPF_PRECISION; j++)
+ {
+ if (y->mantissa[j] > extra_bits[j])
+ {
+ carry = 0;
+ goto no_subtract;
+ }
+ if (y->mantissa[j] < extra_bits[j])
+ break;
+ }
+ /*
+ ** Divisor (y) <= dividend (x), subtract
+ */
+ carry = 0;
+ for (j=(INTERNAL_FPF_PRECISION-1); j>=0; j--)
+ Sub16Bits(&carry,
+ &extra_bits[j],
+ extra_bits[j],
+ y->mantissa[j]);
+ carry = 1; /* 1 shifted into quotient */
+ no_subtract:
+ ShiftMantLeft1(&carry, z->mantissa);
+ z->exp--;
+ }
+ break;
+
+case NAN_NAN:
+ choose_nan(x, y, z, 0);
+ break;
+}
+
+/*
+** Math complete...do rounding
+*/
+RoundInternalFPF(z);
+}
+
+/**********************
+** LongToInternalFPF **
+** Int32ToInternalFPF **
+***********************
+** Convert a signed (long) 32-bit integer into an internal FPF number.
+*/
+/* static void LongToInternalFPF(long mylong, */
+static void Int32ToInternalFPF(int32 mylong,
+ InternalFPF *dest)
+{
+int i; /* Index */
+u16 myword; /* Used to hold converted stuff */
+/*
+** Save the sign and get the absolute value. This will help us
+** with 64-bit machines, since we use only the lower 32
+** bits just in case. (No longer necessary after we use int32.)
+*/
+/* if(mylong<0L) */
+if(mylong<(int32)0)
+{ dest->sign=1;
+ mylong=(int32)0-mylong;
+}
+else
+ dest->sign=0;
+/*
+** Prepare the destination floating point number
+*/
+dest->type=IFPF_IS_NORMAL;
+for(i=0;i<INTERNAL_FPF_PRECISION;i++)
+ dest->mantissa[i]=0;
+
+/*
+** See if we've got a zero. If so, make the resultant FP
+** number a true zero and go home.
+*/
+if(mylong==0)
+{ dest->type=IFPF_IS_ZERO;
+ dest->exp=0;
+ return;
+}
+
+/*
+** Not a true zero. Set the exponent to 32 (internal FPFs have
+** no bias) and load the low and high words into their proper
+** locations in the mantissa. Then normalize. The action of
+** normalizing slides the mantissa bits into place and sets
+** up the exponent properly.
+*/
+dest->exp=32;
+myword=(u16)((mylong >> 16) & 0xFFFFL);
+dest->mantissa[0]=myword;
+myword=(u16)(mylong & 0xFFFFL);
+dest->mantissa[1]=myword;
+normalize(dest);
+return;
+}
+
+#ifdef DEBUG
+/************************
+** InternalFPFToString **
+*************************
+** FOR DEBUG PURPOSES
+** This routine converts an internal floating point representation
+** number to a string. Used in debugging the package.
+** Returns length of converted number.
+** NOTE: dest must point to a buffer big enough to hold the
+** result. Also, this routine does append a null (an effect
+** of using the sprintf() function). It also returns
+** a length count.
+** NOTE: This routine returns 5 significant digits. Thats
+** about all I feel safe with, given the method of
+** conversion. It should be more than enough for programmers
+** to determine whether the package is properly ported.
+*/
+static int InternalFPFToString(char *dest,
+ InternalFPF *src)
+{
+InternalFPF locFPFNum; /* Local for src (will be altered) */
+InternalFPF IFPF10; /* Floating-point 10 */
+InternalFPF IFPFComp; /* For doing comparisons */
+int msign; /* Holding for mantissa sign */
+int expcount; /* Exponent counter */
+int ccount; /* Character counter */
+int i,j,k; /* Index */
+u16 carryaccum; /* Carry accumulator */
+u16 mycarry; /* Local for carry */
+
+/*
+** Check first for the simple things...Nan, Infinity, Zero.
+** If found, copy the proper string in and go home.
+*/
+switch(src->type)
+{
+ case IFPF_IS_NAN:
+ memcpy(dest,"NaN",3);
+ return(3);
+
+ case IFPF_IS_INFINITY:
+ if(src->sign==0)
+ memcpy(dest,"+Inf",4);
+ else
+ memcpy(dest,"-Inf",4);
+ return(4);
+
+ case IFPF_IS_ZERO:
+ if(src->sign==0)
+ memcpy(dest,"+0",2);
+ else
+ memcpy(dest,"-0",2);
+ return(2);
+}
+
+/*
+** Move the internal number into our local holding area, since
+** we'll be altering it to print it out.
+*/
+memcpy((void *)&locFPFNum,(void *)src,sizeof(InternalFPF));
+
+/*
+** Set up a floating-point 10...which we'll use a lot in a minute.
+*/
+/* LongToInternalFPF(10L,&IFPF10); */
+Int32ToInternalFPF((int32)10,&IFPF10);
+
+/*
+** Save the mantissa sign and make it positive.
+*/
+msign=src->sign;
+
+/* src->sign=0 */ /* bug, fixed Nov. 13, 1997 */
+(&locFPFNum)->sign=0;
+
+expcount=0; /* Init exponent counter */
+
+/*
+** See if the number is less than 10. If so, multiply
+** the number repeatedly by 10 until it's not. For each
+** multiplication, decrement a counter so we can keep track
+** of the exponent.
+*/
+
+while(1)
+{ AddSubInternalFPF(1,&locFPFNum,&IFPF10,&IFPFComp);
+ if(IFPFComp.sign==0) break;
+ MultiplyInternalFPF(&locFPFNum,&IFPF10,&IFPFComp);
+ expcount--;
+ memcpy((void *)&locFPFNum,(void *)&IFPFComp,sizeof(InternalFPF));
+}
+/*
+** Do the reverse of the above. As long as the number is
+** greater than or equal to 10, divide it by 10. Increment the
+** exponent counter for each multiplication.
+*/
+
+while(1)
+{
+ AddSubInternalFPF(1,&locFPFNum,&IFPF10,&IFPFComp);
+ if(IFPFComp.sign!=0) break;
+ DivideInternalFPF(&locFPFNum,&IFPF10,&IFPFComp);
+ expcount++;
+ memcpy((void *)&locFPFNum,(void *)&IFPFComp,sizeof(InternalFPF));
+}
+
+/*
+** About time to start storing things. First, store the
+** mantissa sign.
+*/
+ccount=1; /* Init character counter */
+if(msign==0)
+ *dest++='+';
+else
+ *dest++='-';
+
+/*
+** At this point we know that the number is in the range
+** 10 > n >=1. We need to "strip digits" out of the
+** mantissa. We do this by treating the mantissa as
+** an integer and multiplying by 10. (Not a floating-point
+** 10, but an integer 10. Since this is debug code and we
+** could care less about speed, we'll do it the stupid
+** way and simply add the number to itself 10 times.
+** Anything that makes it to the left of the implied binary point
+** gets stripped off and emitted. We'll do this for
+** 5 significant digits (which should be enough to
+** verify things).
+*/
+/*
+** Re-position radix point
+*/
+carryaccum=0;
+while(locFPFNum.exp>0)
+{
+ mycarry=0;
+ ShiftMantLeft1(&mycarry,locFPFNum.mantissa);
+ carryaccum=(carryaccum<<1);
+ if(mycarry) carryaccum++;
+ locFPFNum.exp--;
+}
+
+while(locFPFNum.exp<0)
+{
+ mycarry=0;
+ ShiftMantRight1(&mycarry,locFPFNum.mantissa);
+ locFPFNum.exp++;
+}
+
+for(i=0;i<6;i++)
+ if(i==1)
+ { /* Emit decimal point */
+ *dest++='.';
+ ccount++;
+ }
+ else
+ { /* Emit a digit */
+ *dest++=('0'+carryaccum);
+ ccount++;
+
+ carryaccum=0;
+ memcpy((void *)&IFPF10,
+ (void *)&locFPFNum,
+ sizeof(InternalFPF));
+
+ /* Do multiply via repeated adds */
+ for(j=0;j<9;j++)
+ {
+ mycarry=0;
+ for(k=(INTERNAL_FPF_PRECISION-1);k>=0;k--)
+ Add16Bits(&mycarry,&(IFPFComp.mantissa[k]),
+ locFPFNum.mantissa[k],
+ IFPF10.mantissa[k]);
+ carryaccum+=mycarry ? 1 : 0;
+ memcpy((void *)&locFPFNum,
+ (void *)&IFPFComp,
+ sizeof(InternalFPF));
+ }
+ }
+
+/*
+** Now move the 'E', the exponent sign, and the exponent
+** into the string.
+*/
+*dest++='E';
+
+/* sprint is supposed to return an integer, but it caused problems on SunOS
+ * with the native cc. Hence we force it.
+ * Uwe F. Mayer
+ */
+ccount+=(int)sprintf(dest,"%4d",expcount);
+
+/*
+** All done, go home.
+*/
+return(ccount);
+
+}
+
+#endif
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/emfloat.h b/benchmarks/nbench/nbench-byte-2.2.3/emfloat.h
new file mode 100644
index 0000000..41cc6d9
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/emfloat.h
@@ -0,0 +1,154 @@
+
+/*
+** emfloat.h
+** Header for emfloat.c
+**
+** BYTEmark (tm)
+** BYTE Magazine's Native Mode benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Create:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+
+/* Is this a 64 bit architecture? If so, this will define LONG64 */
+/* Uwe F. Mayer 15 November 1997 */
+#include "pointer.h"
+
+/*
+** DEFINES
+*/
+#define u8 unsigned char
+#define u16 unsigned short
+#ifdef LONG64
+#define u32 unsigned int
+#else
+#define u32 unsigned long
+#endif
+#define uchar unsigned char
+#define ulong unsigned long
+
+#define MAX_EXP 32767L
+#define MIN_EXP (-32767L)
+
+#define IFPF_IS_ZERO 0
+#define IFPF_IS_SUBNORMAL 1
+#define IFPF_IS_NORMAL 2
+#define IFPF_IS_INFINITY 3
+#define IFPF_IS_NAN 4
+#define IFPF_TYPE_COUNT 5
+
+#define ZERO_ZERO 0
+#define ZERO_SUBNORMAL 1
+#define ZERO_NORMAL 2
+#define ZERO_INFINITY 3
+#define ZERO_NAN 4
+
+#define SUBNORMAL_ZERO 5
+#define SUBNORMAL_SUBNORMAL 6
+#define SUBNORMAL_NORMAL 7
+#define SUBNORMAL_INFINITY 8
+#define SUBNORMAL_NAN 9
+
+#define NORMAL_ZERO 10
+#define NORMAL_SUBNORMAL 11
+#define NORMAL_NORMAL 12
+#define NORMAL_INFINITY 13
+#define NORMAL_NAN 14
+
+#define INFINITY_ZERO 15
+#define INFINITY_SUBNORMAL 16
+#define INFINITY_NORMAL 17
+#define INFINITY_INFINITY 18
+#define INFINITY_NAN 19
+
+#define NAN_ZERO 20
+#define NAN_SUBNORMAL 21
+#define NAN_NORMAL 22
+#define NAN_INFINITY 23
+#define NAN_NAN 24
+#define OPERAND_ZERO 0
+#define OPERAND_SUBNORMAL 1
+#define OPERAND_NORMAL 2
+#define OPERAND_INFINITY 3
+#define OPERAND_NAN 4
+
+/*
+** Following already defined in NMGLOBAL.H
+**
+#define INTERNAL_FPF_PRECISION 4
+*/
+
+/*
+** TYPEDEFS
+*/
+
+typedef struct
+{
+ u8 type; /* Indicates, NORMAL, SUBNORMAL, etc. */
+ u8 sign; /* Mantissa sign */
+ short exp; /* Signed exponent...no bias */
+ u16 mantissa[INTERNAL_FPF_PRECISION];
+} InternalFPF;
+
+/*
+** PROTOTYPES
+*/
+void SetupCPUEmFloatArrays(InternalFPF *abase,
+ InternalFPF *bbase, InternalFPF *cbase, ulong arraysize);
+ulong DoEmFloatIteration(InternalFPF *abase,
+ InternalFPF *bbase, InternalFPF *cbase,
+ ulong arraysize, ulong loops);
+static void SetInternalFPFZero(InternalFPF *dest,
+ uchar sign);
+static void SetInternalFPFInfinity(InternalFPF *dest,
+ uchar sign);
+static void SetInternalFPFNaN(InternalFPF *dest);
+static int IsMantissaZero(u16 *mant);
+static void Add16Bits(u16 *carry,u16 *a,u16 b,u16 c);
+static void Sub16Bits(u16 *borrow,u16 *a,u16 b,u16 c);
+static void ShiftMantLeft1(u16 *carry,u16 *mantissa);
+static void ShiftMantRight1(u16 *carry,u16 *mantissa);
+static void StickyShiftRightMant(InternalFPF *ptr,int amount);
+static void normalize(InternalFPF *ptr);
+static void denormalize(InternalFPF *ptr,int minimum_exponent);
+void RoundInternalFPF(InternalFPF *ptr);
+static void choose_nan(InternalFPF *x,InternalFPF *y,InternalFPF *z,
+ int intel_flag);
+static void AddSubInternalFPF(uchar operation,InternalFPF *x,
+ InternalFPF *y,InternalFPF *z);
+static void MultiplyInternalFPF(InternalFPF *x,InternalFPF *y,
+ InternalFPF *z);
+static void DivideInternalFPF(InternalFPF *x,InternalFPF *y,
+ InternalFPF *z);
+/* static void LongToInternalFPF(long mylong, */
+static void Int32ToInternalFPF(int32 mylong,
+ InternalFPF *dest);
+#ifdef DEBUG
+static int InternalFPFToString(char *dest,
+ InternalFPF *src);
+#endif
+
+/*
+** EXTERNALS
+*/
+extern ulong StartStopwatch();
+extern ulong StopStopwatch(ulong elapsed);
+/* extern long randwc(long num); */
+extern int32 randwc(int32 num);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/hardware.c b/benchmarks/nbench/nbench-byte-2.2.3/hardware.c
new file mode 100644
index 0000000..53bbec9
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/hardware.c
@@ -0,0 +1,215 @@
+#include <rtems.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define BUF_SIZ 1024
+
+/******************
+** output_string **
+*******************
+** Displays a string on the screen. Also, if the flag
+** write_to_file is set, outputs the string to the output file.
+** Note, this routine presumes that you've included a carriage
+** return at the end of the buffer.
+*/
+static void output_string(const char *buffer, const int write_to_file,
+ FILE *global_ofile){
+ printf("%s",buffer);
+ if(write_to_file!=0)
+ fprintf(global_ofile,"%s",buffer);
+ return;
+}
+
+
+/******************
+** removeNewLine **
+*******************
+** Removes a trailing newline character if present
+*/
+static void removeNewLine(char * s) {
+ if(strlen(s)>0 && s[strlen(s)-1] == '\n') {
+ s[strlen(s)-1] = '\0';
+ }
+}
+
+
+/***************
+** runCommand **
+****************
+** Run the system command through a pipe
+** The pointer result must point to a pre-allocated array of at least BUF_SIZ
+*/
+
+/*
+static void runCommand (const char *command, char *result) {
+ FILE * pipe;
+
+ pipe = popen(command, "r");
+ if(pipe == NULL) {
+ // command failed
+ result[0] = '\0';
+ } else {
+ if(NULL == fgets(result, BUF_SIZ, pipe)){
+ // command failed
+ result[0] = '\0';
+ }
+ pclose(pipe);
+ }
+ removeNewLine(result);
+}
+*/
+
+/********************
+** readProcCpuInfo **
+*********************
+** Reads and parses /proc/cpuinfo on a Linux system
+** The pointers must point to pre-allocated arrays of at least BUF_SIZ
+*/
+static void readProcCpuInfo (char *model, char *cache) {
+
+ sprintf(model, "NA\n");
+ sprintf(cache, "NA\n");
+
+ /*
+
+ FILE * info;
+ char * cp;
+ int cpus = 0;
+ char * buffer_end;
+ char buffer[BUF_SIZ];
+ char vendor_id[BUF_SIZ];
+ char model_name[BUF_SIZ];
+ char cpu_MHz[BUF_SIZ];
+ int i;
+ float f;
+
+ vendor_id[0] = model_name[0] = cpu_MHz[0] = model[0] = cache[0] = '\0';
+ info = fopen("/proc/cpuinfo", "r");
+ if(info != NULL) {
+ // command did not fail
+ while(NULL != fgets(buffer, BUF_SIZ, info)){
+ buffer_end = buffer + strlen(buffer);
+ cp = buffer;
+ if(! strncmp(buffer, "processor", 9)) {
+ cpus++;
+ } else if(! strncmp(buffer, "vendor_id", 9)) {
+ cp+=strlen("vendor_id");
+ while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+ cp++;
+ if(cp<buffer_end) {
+ strcpy(vendor_id, cp);
+ }
+ removeNewLine(vendor_id);
+ } else if(! strncmp(buffer, "model name", 10)) {
+ cp+=strlen("model name");
+ while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+ cp++;
+ if(cp<buffer_end) {
+ strcpy(model_name, cp);
+ }
+ removeNewLine(model_name);
+ } else if(! strncmp(buffer, "cpu MHz", 7)) {
+ cp+=strlen("cpu MHz");
+ while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+ cp++;
+ if(cp<buffer_end) {
+ strcpy(cpu_MHz, cp);
+ }
+ removeNewLine(cpu_MHz);
+ } else if(! strncmp(buffer, "cache size", 10)) {
+ cp+=strlen("cache size");
+ while(cp < buffer_end && ( *cp == ' ' || *cp == ':'|| *cp == '\t'))
+ cp++;
+ if(cp<buffer_end) {
+ strcpy(cache, cp);
+ }
+ removeNewLine(cache);
+ }
+ }
+ if(cpus>1) {
+ if (cpus==2) {
+ strcpy(model, "Dual");
+ } else {
+ sprintf(model, "%d CPU", cpus);
+ }
+ }
+ cp = model + strlen(model);
+ if(vendor_id[0] != '\0'){
+ if(cp != model){
+ *cp++ = ' ';
+ }
+ strcpy(cp, vendor_id);
+ cp += strlen(vendor_id);
+ }
+ if(model_name[0] != '\0'){
+ if(cp != model){
+ *cp++ = ' ';
+ }
+ strcpy(cp, model_name);
+ cp += strlen(model_name);
+ }
+ if(cpu_MHz[0] != '\0'){
+ if(cp != model){
+ *cp++ = ' ';
+ }
+ f = atof(cpu_MHz);
+ i = (int)(f+0.5f);
+ sprintf(cpu_MHz, "%dMHz", i);
+ strcpy(cp, cpu_MHz);
+ cp += strlen(cpu_MHz);
+ }
+ fclose(info);
+ }*/
+}
+
+
+/*************
+** hardware **
+**************
+** Runs the system command "uname -s -r"
+** Reads /proc/cpuinfo if on a linux system
+** Writes output
+*/
+
+#define NO_UNAME
+
+void hardware(const int write_to_file, FILE *global_ofile) {
+ char buffer[BUF_SIZ];
+ char os[BUF_SIZ];
+ char model[BUF_SIZ];
+ char cache[BUF_SIZ];
+ char os_command[] = "uname -s -r";
+#ifdef NO_UNAME
+ os[0] = '\0';
+#else
+ runCommand(os_command, os);
+#endif
+ if(NULL != strstr(os, "Linux")) {
+ readProcCpuInfo (model, cache);
+ } else {
+ model[0] = '\0';
+ cache[0] = '\0';
+ }
+ sprintf(buffer, "CPU : %s\n", model);
+ output_string(buffer, write_to_file, global_ofile);
+ sprintf(buffer, "L2 Cache : %s\n", cache);
+ output_string(buffer, write_to_file, global_ofile);
+ sprintf(buffer, "OS : %s\n", os);
+ output_string(buffer, write_to_file, global_ofile);
+}
+
+
+/************************
+** main for hardware.c **
+*************************
+** For testing of code only
+** Should be commented out
+*/
+/*
+int main(int argc, char * argv[]) {
+ hardware(0, NULL);
+ return 0;
+}
+*/
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/hardware.h b/benchmarks/nbench/nbench-byte-2.2.3/hardware.h
new file mode 100644
index 0000000..2a07934
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/hardware.h
@@ -0,0 +1,2 @@
+extern
+void hardware(const int write_to_file, FILE *global_ofile);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/hello.c b/benchmarks/nbench/nbench-byte-2.2.3/hello.c
new file mode 100644
index 0000000..c664483
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/hello.c
@@ -0,0 +1,2 @@
+#include <stdio.h>
+int main () {printf("hello.\n");return(0);}
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/misc.c b/benchmarks/nbench/nbench-byte-2.2.3/misc.c
new file mode 100644
index 0000000..a5144e4
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/misc.c
@@ -0,0 +1,120 @@
+
+/*
+** misc.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+#include "misc.h"
+
+/***********************************************************
+** MISCELLANEOUS BUT OTHERWISE NECESSARY ROUTINES **
+***********************************************************/
+
+/****************************
+** RANDOM NUMBER GENERATOR **
+*****************************
+** This is a second-order linear congruential random number
+** generator. Its advantage is (of course) that it can be
+** seeded and will thus produce repeatable sequences of
+** random numbers.
+*/
+
+/****************************
+* randwc() *
+*****************************
+** Returns signed long random modulo num.
+*/
+/*
+long randwc(long num)
+{
+ return(randnum(0L)%num);
+}
+*/
+/*
+** Returns signed 32-bit random modulo num.
+*/
+int32 randwc(int32 num)
+{
+ return(randnum((int32)0)%num);
+}
+
+/***************************
+** abs_randwc() **
+****************************
+** Same as randwc(), only this routine returns only
+** positive numbers.
+*/
+/*
+unsigned long abs_randwc(unsigned long num)
+{
+long temp;
+
+temp=randwc(num);
+if(temp<0) temp=0L-temp;
+
+return((unsigned long)temp);
+}
+*/
+u32 abs_randwc(u32 num)
+{
+int32 temp; /* Temporary storage */
+
+temp=randwc(num);
+if(temp<0) temp=(int32)0-temp;
+
+return((u32)temp);
+}
+
+/****************************
+* randnum() *
+*****************************
+** Second order linear congruential generator.
+** Constants suggested by J. G. Skellam.
+** If val==0, returns next member of sequence.
+** val!=0, restart generator.
+*/
+/*
+long randnum(long lngval)
+{
+ register long interm;
+ static long randw[2] = { 13L , 117L };
+
+ if (lngval!=0L)
+ { randw[0]=13L; randw[1]=117L; }
+
+ interm=(randw[0]*254754L+randw[1]*529562L)%999563L;
+ randw[1]=randw[0];
+ randw[0]=interm;
+ return(interm);
+}
+*/
+int32 randnum(int32 lngval)
+{
+ register int32 interm;
+ static int32 randw[2] = { (int32)13 , (int32)117 };
+
+ if (lngval!=(int32)0)
+ { randw[0]=(int32)13; randw[1]=(int32)117; }
+
+ interm=(randw[0]*(int32)254754+randw[1]*(int32)529562)%(int32)999563;
+ randw[1]=randw[0];
+ randw[0]=interm;
+ return(interm);
+}
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/misc.h b/benchmarks/nbench/nbench-byte-2.2.3/misc.h
new file mode 100644
index 0000000..0f9bc13
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/misc.h
@@ -0,0 +1,41 @@
+/*
+** misc.h
+** Header for misc.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/************************
+** FUNCTION PROTOTYPES **
+************************/
+
+/*
+long randwc(long num);
+unsigned long abs_randwc(unsigned long num);
+long randnum(long lngval);
+*/
+
+#include "nmglobal.h"
+int32 randwc(int32 num);
+u32 abs_randwc(u32 num);
+int32 randnum(int32 lngval);
+
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/nbench0.c b/benchmarks/nbench/nbench-byte-2.2.3/nbench0.c
new file mode 100644
index 0000000..37496e1
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/nbench0.c
@@ -0,0 +1,1176 @@
+
+/*
+** nbench0.c
+*/
+
+/*******************************************
+** BYTEmark (tm) **
+** BYTE MAGAZINE'S NATIVE MODE BENCHMARKS **
+** FOR CPU/FPU **
+** ver 2.0 **
+** Rick Grehan, BYTE Magazine **
+********************************************
+** NOTE: These benchmarks do NOT check for the presence
+** of an FPU. You have to find that out manually.
+**
+** REVISION HISTORY FOR BENCHMARKS
+** 9/94 -- First beta. --RG
+** 12/94 -- Bug discovered in some of the integer routines
+** (IDEA, Huffman,...). Routines were not accurately counting
+** the number of loops. Fixed. --RG (Thanks to Steve A.)
+** 12/94 -- Added routines to calculate and display index
+** values. Indexes based on DELL XPS 90 (90 MHz Pentium).
+** 1/95 -- Added Mac time manager routines for more accurate
+** timing on Macintosh (said to be good to 20 usecs) -- RG
+** 1/95 -- Re-did all the #defines so they made more
+** sense. See NMGLOBAL.H -- RG
+** 3/95 -- Fixed memory leak in LU decomposition. Did not
+** invalidate previous results, just made it easier to run.--RG
+** 3/95 -- Added TOOLHELP.DLL timing routine to Windows timer. --RG
+** 10/95 -- Added memory array & alignment; moved memory
+** allocation out of LU Decomposition -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench0.h"
+#include "hardware.h"
+
+/*************
+**** main ****
+*************/
+#ifdef MAC
+void main(void)
+#else
+int nbench_main(int argc, char *argv[])
+#endif
+{
+int i; /* Index */
+time_t time_and_date; /* Self-explanatory */
+struct tm *loctime;
+double bmean; /* Benchmark mean */
+double bstdev; /* Benchmark stdev */
+double lx_memindex; /* Linux memory index (mainly integer operations)*/
+double lx_intindex; /* Linux integer index */
+double lx_fpindex; /* Linux floating-point index */
+double intindex; /* Integer index */
+double fpindex; /* Floating-point index */
+ulong bnumrun; /* # of runs */
+
+#ifdef MAC
+ MaxApplZone();
+#endif
+
+#ifdef MACTIMEMGR
+/* Set up high res timer */
+MacHSTdelay=600*1000*1000; /* Delay is 10 minutes */
+
+memset((char *)&myTMTask,0,sizeof(TMTask));
+
+/* Prime and remove the task, calculating overhead */
+PrimeTime((QElemPtr)&myTMTask,-MacHSTdelay);
+RmvTime((QElemPtr)&myTMTask);
+MacHSTohead=MacHSTdelay+myTMTask.tmCount;
+#endif
+
+#ifdef WIN31TIMER
+/* Set up the size of the timer info structure */
+win31tinfo.dwSize=(DWORD)sizeof(TIMERINFO);
+/* Load library */
+if((hThlp=LoadLibrary("TOOLHELP.DLL"))<32)
+{ printf("Error loading TOOLHELP\n");
+ exit(0);
+}
+if(!(lpfn=GetProcAddress(hThlp,"TimerCount")))
+{ printf("TOOLHELP error\n");
+ exit(0);
+}
+#endif
+
+/*
+** Set global parameters to default.
+*/
+global_min_ticks=MINIMUM_TICKS;
+global_min_seconds=MINIMUM_SECONDS;
+global_allstats=0;
+global_custrun=0;
+global_align=8;
+write_to_file=0;
+lx_memindex=(double)1.0; /* set for geometric mean computations */
+lx_intindex=(double)1.0;
+lx_fpindex=(double)1.0;
+intindex=(double)1.0;
+fpindex=(double)1.0;
+mem_array_ents=0; /* Nothing in mem array */
+
+/*
+** We presume all tests will be run unless told
+** otherwise
+*/
+for(i=0;i<NUMTESTS;i++)
+ tests_to_do[i]=1;
+
+ //tests_to_do[0]=0;
+
+/*
+** Initialize test data structures to default
+** values.
+*/
+set_request_secs(); /* Set all request_secs fields */
+global_numsortstruct.adjust=0;
+global_numsortstruct.arraysize=NUMARRAYSIZE;
+
+global_strsortstruct.adjust=0;
+global_strsortstruct.arraysize=STRINGARRAYSIZE;
+
+global_bitopstruct.adjust=0;
+global_bitopstruct.bitfieldarraysize=BITFARRAYSIZE;
+
+global_emfloatstruct.adjust=0;
+global_emfloatstruct.arraysize=EMFARRAYSIZE;
+
+global_fourierstruct.adjust=0;
+
+global_assignstruct.adjust=0;
+
+global_ideastruct.adjust=0;
+global_ideastruct.arraysize=IDEAARRAYSIZE;
+
+global_huffstruct.adjust=0;
+global_huffstruct.arraysize=HUFFARRAYSIZE;
+
+global_nnetstruct.adjust=0;
+
+global_lustruct.adjust=0;
+
+/*
+** For Macintosh -- read the command line.
+*/
+#ifdef MAC
+UCommandLine();
+#endif
+
+/*
+** Handle any command-line arguments.
+*/
+if(argc>1)
+ for(i=1;i<argc;i++)
+ if(parse_arg(argv[i])==-1)
+ { display_help(argv[0]);
+ exit(0);
+ }
+/*
+** Output header
+*/
+#ifdef LINUX
+output_string("\nBYTEmark* Native Mode Benchmark ver. 2 (10/95)\n");
+output_string("Index-split by Andrew D. Balsa (11/97)\n");
+output_string("Linux/Unix* port by Uwe F. Mayer (12/96,11/97)\n");
+#else
+output_string("BBBBBB YYY Y TTTTTTT EEEEEEE\n");
+output_string("BBB B YYY Y TTT EEE\n");
+output_string("BBB B YYY Y TTT EEE\n");
+output_string("BBBBBB YYY Y TTT EEEEEEE\n");
+output_string("BBB B YYY TTT EEE\n");
+output_string("BBB B YYY TTT EEE\n");
+output_string("BBBBBB YYY TTT EEEEEEE\n\n");
+output_string("\nBYTEmark (tm) Native Mode Benchmark ver. 2 (10/95)\n");
+#endif
+/*
+** See if the user wants all stats. Output heading info
+** if so.
+*/
+if(global_allstats)
+{
+ output_string("\n");
+ output_string("============================== ALL STATISTICS ===============================\n");
+ time(&time_and_date);
+ loctime=localtime(&time_and_date);
+ sprintf(buffer,"**Date and time of benchmark run: %s",asctime(loctime));
+ output_string(buffer);
+ sprintf(buffer,"**Sizeof: char:%u short:%u int:%u long:%u u8:%u u16:%u u32:%u int32:%u\n",
+ (unsigned int)sizeof(char),
+ (unsigned int)sizeof(short),
+ (unsigned int)sizeof(int),
+ (unsigned int)sizeof(long),
+ (unsigned int)sizeof(u8),
+ (unsigned int)sizeof(u16),
+ (unsigned int)sizeof(u32),
+ (unsigned int)sizeof(int32));
+ output_string(buffer);
+#ifdef LINUX
+#include "sysinfo.c"
+#else
+ sprintf(buffer,"**%s\n",sysname);
+ output_string(buffer);
+ sprintf(buffer,"**%s\n",compilername);
+ output_string(buffer);
+ sprintf(buffer,"**%s\n",compilerversion);
+ output_string(buffer);
+#endif
+ output_string("=============================================================================\n");
+}
+
+/*
+** Execute the tests.
+*/
+#ifdef LINUX
+output_string("\nTEST : Iterations/sec. : Old Index : New Index\n");
+output_string(" : : Pentium 90* : AMD K6/233*\n");
+output_string("--------------------:------------------:-------------:------------\n");
+#endif
+
+for(i=0;i<NUMTESTS;i++)
+{
+ if(tests_to_do[i])
+ { sprintf(buffer,"%s :",ftestnames[i]);
+ output_string(buffer);
+ if (0!=bench_with_confidence(i,
+ &bmean,
+ &bstdev,
+ &bnumrun)){
+ output_string("\n** WARNING: The current test result is NOT 95 % statistically certain.\n");
+ output_string("** WARNING: The variation among the individual results is too large.\n");
+ output_string(" :");
+ }
+#ifdef LINUX
+ sprintf(buffer," %15.5g : %9.2f : %9.2f\n",
+ bmean,bmean/bindex[i],bmean/lx_bindex[i]);
+#else
+ sprintf(buffer," Iterations/sec.: %13.2f Index: %6.2f\n",
+ bmean,bmean/bindex[i]);
+#endif
+ output_string(buffer);
+ /*
+ ** Gather integer or FP indexes
+ */
+ if((i==4)||(i==8)||(i==9)){
+ /* FP index */
+ fpindex=fpindex*(bmean/bindex[i]);
+ /* Linux FP index */
+ lx_fpindex=lx_fpindex*(bmean/lx_bindex[i]);
+ }
+ else{
+ /* Integer index */
+ intindex=intindex*(bmean/bindex[i]);
+ if((i==0)||(i==3)||(i==6)||(i==7))
+ /* Linux integer index */
+ lx_intindex=lx_intindex*(bmean/lx_bindex[i]);
+ else
+ /* Linux memory index */
+ lx_memindex=lx_memindex*(bmean/lx_bindex[i]);
+ }
+
+ if(global_allstats)
+ {
+ sprintf(buffer," Absolute standard deviation: %g\n",bstdev);
+ output_string(buffer);
+ if (bmean>(double)1e-100){
+ /* avoid division by zero */
+ sprintf(buffer," Relative standard deviation: %g %%\n",
+ (double)100*bstdev/bmean);
+ output_string(buffer);
+ }
+ sprintf(buffer," Number of runs: %lu\n",bnumrun);
+ output_string(buffer);
+ show_stats(i);
+ sprintf(buffer,"Done with %s\n\n",ftestnames[i]);
+ output_string(buffer);
+ }
+ }
+}
+/* printf("...done...\n"); */
+
+/*
+** Output the total indexes
+*/
+if(1)//global_custrun==1)
+{
+ output_string("==========================ORIGINAL BYTEMARK RESULTS==========================\n");
+ sprintf(buffer,"INTEGER INDEX : %.3f\n",
+ pow(intindex,(double).142857));
+ output_string(buffer);
+ sprintf(buffer,"FLOATING-POINT INDEX: %.3f\n",
+ pow(fpindex,(double).33333));
+ output_string(buffer);
+ output_string("Baseline (MSDOS*) : Pentium* 90, 256 KB L2-cache, Watcom* compiler 10.0\n");
+#ifdef LINUX
+ output_string("==============================LINUX DATA BELOW===============================\n");
+ hardware(write_to_file, global_ofile);
+#include "sysinfoc.c"
+ sprintf(buffer,"MEMORY INDEX : %.3f\n",
+ pow(lx_memindex,(double).3333333333));
+ output_string(buffer);
+ sprintf(buffer,"INTEGER INDEX : %.3f\n",
+ pow(lx_intindex,(double).25));
+ output_string(buffer);
+ sprintf(buffer,"FLOATING-POINT INDEX: %.3f\n",
+ pow(lx_fpindex,(double).3333333333));
+ output_string(buffer);
+ output_string("Baseline (LINUX) : AMD K6/233*, 512 KB L2-cache, gcc 2.7.2.3, libc-5.4.38\n");
+#endif
+output_string("* Trademarks are property of their respective holder.\n");
+}
+
+//exit(0);
+}
+
+/**************
+** parse_arg **
+***************
+** Given a pointer to a string, we assume that's an argument.
+** Parse that argument and act accordingly.
+** Return 0 if ok, else return -1.
+*/
+static int parse_arg(char *argptr)
+{
+int i; /* Index */
+FILE *cfile; /* Command file identifier */
+
+/*
+** First character has got to be a hyphen.
+*/
+if(*argptr++!='-') return(-1);
+
+/*
+** Convert the rest of the argument to upper case
+** so there's little chance of confusion.
+*/
+for(i=0;i<strlen(argptr);i++)
+ argptr[i]=(char)toupper((int)argptr[i]);
+
+/*
+** Next character picks the action.
+*/
+switch(*argptr++)
+{
+ case '?': return(-1); /* Will display help */
+
+ case 'V': global_allstats=1; return(0); /* verbose mode */
+
+ case 'C': /* Command file name */
+ /*
+ ** First try to open the file for reading.
+ */
+ cfile=fopen(argptr,"r");
+ if(cfile==(FILE *)NULL)
+ { printf("**Error opening file: %s\n",argptr);
+ return(-1);
+ }
+ read_comfile(cfile); /* Read commands */
+ fclose(cfile);
+ break;
+ default:
+ return(-1);
+}
+return(0);
+}
+
+/*******************
+** display_help() **
+********************
+** Display a help message showing argument requirements and such.
+** Exit when you're done...I mean, REALLY exit.
+*/
+void display_help(char *progname)
+{
+ printf("Usage: %s [-v] [-c<FILE>]\n",progname);
+ printf(" -v = verbose\n");
+ printf(" -c = input parameters thru command file <FILE>\n");
+ exit(0);
+}
+
+
+/*****************
+** read_comfile **
+******************
+** Read the command file. Set global parameters as
+** specified. This routine assumes that the command file
+** is already open.
+*/
+static void read_comfile(FILE *cfile)
+{
+char inbuf[40];
+char *eptr; /* Offset to "=" sign */
+int i; /* Index */
+
+/*
+** Sit in a big loop, reading a line from the file at each
+** pass. Terminate on EOF.
+*/
+while(fgets(inbuf,39,cfile)!=(char *)NULL)
+{
+ /* Overwrite the CR character */
+ if(strlen(inbuf)>0)
+ inbuf[strlen(inbuf)-1]='\0';
+
+ /*
+ ** Parse up to the "=" sign. If we don't find an
+ ** "=", then flag an error.
+ */
+ if((eptr=strchr(inbuf,(int)'='))==(char *)NULL)
+ { printf("**COMMAND FILE ERROR at LINE:\n %s\n",
+ inbuf);
+ goto skipswitch; /* A GOTO!!!! */
+ }
+
+ /*
+ ** Insert a null where the "=" was, then convert
+ ** the substring to uppercase. That will enable
+ ** us to perform the match.
+ */
+ *eptr++='\0';
+ strtoupper((char *)&inbuf[0]);
+ i=MAXPARAM;
+ do {
+ if(strcmp(inbuf,paramnames[i])==0)
+ break;
+ } while(--i>=0);
+
+ if(i<0)
+ { printf("**COMMAND FILE ERROR -- UNKNOWN PARAM: %s",
+ inbuf);
+ goto skipswitch;
+ }
+
+ /*
+ ** Advance eptr to the next field...which should be
+ ** the value assigned to the parameter.
+ */
+ switch(i)
+ {
+ case PF_GMTICKS: /* GLOBALMINTICKS */
+ global_min_ticks=(ulong)atol(eptr);
+ break;
+
+ case PF_MINSECONDS: /* MINSECONDS */
+ global_min_seconds=(ulong)atol(eptr);
+ set_request_secs();
+ break;
+
+ case PF_ALLSTATS: /* ALLSTATS */
+ global_allstats=getflag(eptr);
+ break;
+
+ case PF_OUTFILE: /* OUTFILE */
+ strcpy(global_ofile_name,eptr);
+ global_ofile=fopen(global_ofile_name,"a");
+ /*
+ ** Open the output file.
+ */
+ if(global_ofile==(FILE *)NULL)
+ { printf("**Error opening output file: %s\n",
+ global_ofile_name);
+ ErrorExit();
+ }
+ write_to_file=-1;
+ break;
+
+ case PF_CUSTOMRUN: /* CUSTOMRUN */
+ global_custrun=getflag(eptr);
+ for(i=0;i<NUMTESTS;i++)
+ tests_to_do[i]=1-global_custrun;
+ break;
+
+ case PF_DONUM: /* DONUMSORT */
+ tests_to_do[TF_NUMSORT]=getflag(eptr);
+ break;
+
+ case PF_NUMNUMA: /* NUMNUMARRAYS */
+ global_numsortstruct.numarrays=
+ (ushort)atoi(eptr);
+ global_numsortstruct.adjust=1;
+ break;
+
+ case PF_NUMASIZE: /* NUMARRAYSIZE */
+ global_numsortstruct.arraysize=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_NUMMINS: /* NUMMINSECONDS */
+ global_numsortstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOSTR: /* DOSTRINGSORT */
+ tests_to_do[TF_SSORT]=getflag(eptr);
+ break;
+
+ case PF_STRASIZE: /* STRARRAYSIZE */
+ global_strsortstruct.arraysize=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_NUMSTRA: /* NUMSTRARRAYS */
+ global_strsortstruct.numarrays=
+ (ushort)atoi(eptr);
+ global_strsortstruct.adjust=1;
+ break;
+
+ case PF_STRMINS: /* STRMINSECONDS */
+ global_strsortstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOBITF: /* DOBITFIELD */
+ tests_to_do[TF_BITOP]=getflag(eptr);
+ break;
+
+ case PF_NUMBITOPS: /* NUMBITOPS */
+ global_bitopstruct.bitoparraysize=
+ (ulong)atol(eptr);
+ global_bitopstruct.adjust=1;
+ break;
+
+ case PF_BITFSIZE: /* BITFIELDSIZE */
+ global_bitopstruct.bitfieldarraysize=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_BITMINS: /* BITMINSECONDS */
+ global_bitopstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOEMF: /* DOEMF */
+ tests_to_do[TF_FPEMU]=getflag(eptr);
+ break;
+
+ case PF_EMFASIZE: /* EMFARRAYSIZE */
+ global_emfloatstruct.arraysize=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_EMFLOOPS: /* EMFLOOPS */
+ global_emfloatstruct.loops=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_EMFMINS: /* EMFMINSECOND */
+ global_emfloatstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOFOUR: /* DOFOUR */
+ tests_to_do[TF_FFPU]=getflag(eptr);
+ break;
+
+ case PF_FOURASIZE: /* FOURASIZE */
+ global_fourierstruct.arraysize=
+ (ulong)atol(eptr);
+ global_fourierstruct.adjust=1;
+ break;
+
+ case PF_FOURMINS: /* FOURMINSECONDS */
+ global_fourierstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOASSIGN: /* DOASSIGN */
+ tests_to_do[TF_ASSIGN]=getflag(eptr);
+ break;
+
+ case PF_AARRAYS: /* ASSIGNARRAYS */
+ global_assignstruct.numarrays=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_ASSIGNMINS: /* ASSIGNMINSECONDS */
+ global_assignstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOIDEA: /* DOIDEA */
+ tests_to_do[TF_IDEA]=getflag(eptr);
+ break;
+
+ case PF_IDEAASIZE: /* IDEAARRAYSIZE */
+ global_ideastruct.arraysize=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_IDEALOOPS: /* IDEALOOPS */
+ global_ideastruct.loops=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_IDEAMINS: /* IDEAMINSECONDS */
+ global_ideastruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOHUFF: /* DOHUFF */
+ tests_to_do[TF_HUFF]=getflag(eptr);
+ break;
+
+ case PF_HUFFASIZE: /* HUFFARRAYSIZE */
+ global_huffstruct.arraysize=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_HUFFLOOPS: /* HUFFLOOPS */
+ global_huffstruct.loops=
+ (ulong)atol(eptr);
+ global_huffstruct.adjust=1;
+ break;
+
+ case PF_HUFFMINS: /* HUFFMINSECONDS */
+ global_huffstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DONNET: /* DONNET */
+ tests_to_do[TF_NNET]=getflag(eptr);
+ break;
+
+ case PF_NNETLOOPS: /* NNETLOOPS */
+ global_nnetstruct.loops=
+ (ulong)atol(eptr);
+ global_nnetstruct.adjust=1;
+ break;
+
+ case PF_NNETMINS: /* NNETMINSECONDS */
+ global_nnetstruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_DOLU: /* DOLU */
+ tests_to_do[TF_LU]=getflag(eptr);
+ break;
+
+ case PF_LUNARRAYS: /* LUNUMARRAYS */
+ global_lustruct.numarrays=
+ (ulong)atol(eptr);
+ global_lustruct.adjust=1;
+ break;
+
+ case PF_LUMINS: /* LUMINSECONDS */
+ global_lustruct.request_secs=
+ (ulong)atol(eptr);
+ break;
+
+ case PF_ALIGN: /* ALIGN */
+ global_align=atoi(eptr);
+ break;
+ }
+skipswitch:
+ continue;
+} /* End while */
+
+return;
+}
+
+/************
+** getflag **
+*************
+** Return 1 if cptr points to "T"; 0 otherwise.
+*/
+static int getflag(char *cptr)
+{
+ if(toupper((int)*cptr)=='T') return(1);
+return(0);
+}
+
+/***************
+** strtoupper **
+****************
+** Convert's a string to upper case. The string is presumed
+** to consist only of alphabetic characters, and to be terminated
+** with a null.
+*/
+static void strtoupper(char *s)
+{
+
+do {
+/*
+** Oddly enough, the following line did not work under THINK C.
+** So, I modified it....hmmmm. --RG
+ *s++=(char)toupper((int)*s);
+*/
+ *s=(char)toupper((int)*s);
+ s++;
+} while(*s!=(char)'\0');
+return;
+}
+
+/*********************
+** set_request_secs **
+**********************
+** Set everyone's "request_secs" entry to whatever
+** value is in global_min_secs. This is done
+** at the beginning, and possibly later if the
+** user redefines global_min_secs in the command file.
+*/
+static void set_request_secs(void)
+{
+
+global_numsortstruct.request_secs=global_min_seconds;
+global_strsortstruct.request_secs=global_min_seconds;
+global_bitopstruct.request_secs=global_min_seconds;
+global_emfloatstruct.request_secs=global_min_seconds;
+global_fourierstruct.request_secs=global_min_seconds;
+global_assignstruct.request_secs=global_min_seconds;
+global_ideastruct.request_secs=global_min_seconds;
+global_huffstruct.request_secs=global_min_seconds;
+global_nnetstruct.request_secs=global_min_seconds;
+global_lustruct.request_secs=global_min_seconds;
+
+return;
+}
+
+
+/**************************
+** bench_with_confidence **
+***************************
+** Given a benchmark id that indicates a function, this routine
+** repeatedly calls that benchmark, seeking to collect and replace
+** scores to get 5 that meet the confidence criteria.
+**
+** The above is mathematically questionable, as the statistical theory
+** depends on independent observations, and if we exchange data points
+** depending on what we already have then this certainly violates
+** independence of the observations. Hence I changed this so that at
+** most 30 observations are done, but none are deleted as we go
+** along. We simply do more runs and hope to get a big enough sample
+** size so that things stabilize. Uwe F. Mayer
+**
+** Return 0 if ok, -1 if failure. Returns mean
+** and std. deviation of results if successful.
+*/
+static int bench_with_confidence(int fid, /* Function id */
+ double *mean, /* Mean of scores */
+ double *stdev, /* Standard deviation */
+ ulong *numtries) /* # of attempts */
+{
+double myscores[30]; /* Need at least 5 scores, use at most 30 */
+double c_half_interval; /* Confidence half interval */
+int i; /* Index */
+/* double newscore; */ /* For improving confidence interval */
+
+/*
+** Get first 5 scores. Then begin confidence testing.
+*/
+for (i=0;i<5;i++)
+{ (*funcpointer[fid])();
+ myscores[i]=getscore(fid);
+#ifdef DEBUG
+ printf("score # %d = %g\n", i, myscores[i]);
+#endif
+}
+*numtries=5; /* Show 5 attempts */
+
+/*
+** The system allows a maximum of 30 tries before it gives
+** up. Since we've done 5 already, we'll allow 25 more.
+*/
+
+/*
+** Enter loop to test for confidence criteria.
+*/
+while(1)
+{
+ /*
+ ** Calculate confidence. Should always return 0.
+ */
+ if (0!=calc_confidence(myscores,
+ *numtries,
+ &c_half_interval,
+ mean,
+ stdev)) return(-1);
+
+ /*
+ ** Is the length of the half interval 5% or less of mean?
+ ** If so, we can go home. Otherwise, we have to continue.
+ */
+ if(c_half_interval/ (*mean) <= (double)0.05)
+ break;
+
+#ifdef OLDCODE
+#undef OLDCODE
+#endif
+#ifdef OLDCODE
+/* this code is no longer valid, we now do not replace but add new scores */
+/* Uwe F. Mayer */
+ /*
+ ** Go get a new score and see if it
+ ** improves existing scores.
+ */
+ do {
+ if(*numtries==10)
+ return(-1);
+ (*funcpointer[fid])();
+ *numtries+=1;
+ newscore=getscore(fid);
+ } while(seek_confidence(myscores,&newscore,
+ &c_half_interval,mean,stdev)==0);
+#endif
+ /* We now simply add a new test run and hope that the runs
+ finally stabilize, Uwe F. Mayer */
+ if(*numtries==30) return(-1);
+ (*funcpointer[fid])();
+ myscores[*numtries]=getscore(fid);
+#ifdef DEBUG
+ printf("score # %ld = %g\n", *numtries, myscores[*numtries]);
+#endif
+ *numtries+=1;
+}
+
+return(0);
+}
+
+#ifdef OLDCODE
+/* this procecdure is no longer needed, Uwe F. Mayer */
+ /********************
+ ** seek_confidence **
+ *********************
+ ** Pass this routine an array of 5 scores PLUS a new score.
+ ** This routine tries the new score in place of each of
+ ** the other five scores to determine if the new score,
+ ** when replacing one of the others, improves the confidence
+ ** half-interval.
+ ** Return 0 if failure. Original 5 scores unchanged.
+ ** Return -1 if success. Also returns new half-interval,
+ ** mean, and standard deviation of the sample.
+ */
+ static int seek_confidence( double scores[5],
+ double *newscore,
+ double *c_half_interval,
+ double *smean,
+ double *sdev)
+ {
+ double sdev_to_beat; /* Original sdev to be beaten */
+ double temp; /* For doing a swap */
+ int is_beaten; /* Indicates original was beaten */
+ int i; /* Index */
+
+ /*
+ ** First calculate original standard deviation
+ */
+ calc_confidence(scores,c_half_interval,smean,sdev);
+ sdev_to_beat=*sdev;
+ is_beaten=-1;
+
+ /*
+ ** Try to beat original score. We'll come out of this
+ ** loop with a flag.
+ */
+ for(i=0;i<5;i++)
+ {
+ temp=scores[i];
+ scores[i]=*newscore;
+ calc_confidence(scores,c_half_interval,smean,sdev);
+ scores[i]=temp;
+ if(sdev_to_beat>*sdev)
+ { is_beaten=i;
+ sdev_to_beat=*sdev;
+ }
+ }
+
+ if(is_beaten!=-1)
+ { scores[is_beaten]=*newscore;
+ return(-1);
+ }
+ return(0);
+ }
+#endif
+
+/********************
+** calc_confidence **
+*********************
+** Given a set of numtries scores, calculate the confidence
+** half-interval. We'll also return the sample mean and sample
+** standard deviation.
+** NOTE: This routines presumes a confidence of 95% and
+** a confidence coefficient of .95
+** returns 0 if there is an error, otherwise -1
+*/
+static int calc_confidence(double scores[], /* Array of scores */
+ int num_scores, /* number of scores in array */
+ double *c_half_interval, /* Confidence half-int */
+ double *smean, /* Standard mean */
+ double *sdev) /* Sample stand dev */
+{
+/* Here is a list of the student-t distribution up to 29 degrees of
+ freedom. The value at 0 is bogus, as there is no value for zero
+ degrees of freedom. */
+double student_t[30]={0.0 , 12.706 , 4.303 , 3.182 , 2.776 , 2.571 ,
+ 2.447 , 2.365 , 2.306 , 2.262 , 2.228 ,
+ 2.201 , 2.179 , 2.160 , 2.145 , 2.131 ,
+ 2.120 , 2.110 , 2.101 , 2.093 , 2.086 ,
+ 2.080 , 2.074 , 2.069 , 2.064 , 2.060 ,
+ 2.056 , 2.052 , 2.048 , 2.045 };
+int i; /* Index */
+if ((num_scores<2) || (num_scores>30)) {
+ output_string("Internal error: calc_confidence called with an illegal number of scores\n");
+ return(-1);
+}
+/*
+** First calculate mean.
+*/
+*smean=(double)0.0;
+for(i=0;i<num_scores;i++){
+ *smean+=scores[i];
+}
+*smean/=(double)num_scores;
+
+/* Get standard deviation */
+*sdev=(double)0.0;
+for(i=0;i<num_scores;i++) {
+ *sdev+=(scores[i]-(*smean))*(scores[i]-(*smean));
+}
+*sdev/=(double)(num_scores-1);
+*sdev=sqrt(*sdev);
+
+/* Now calculate the length of the confidence half-interval. For a
+** confidence level of 95% our confidence coefficient gives us a
+** multiplying factor of the upper .025 quartile of a t distribution
+** with num_scores-1 degrees of freedom, and dividing by sqrt(number of
+** observations). See any introduction to statistics.
+*/
+*c_half_interval=student_t[num_scores-1] * (*sdev) / sqrt((double)num_scores);
+return(0);
+}
+
+/*************
+** getscore **
+**************
+** Return the score for a particular benchmark.
+*/
+static double getscore(int fid)
+{
+
+/*
+** Fid tells us the function. This is really a matter of
+** doing the proper coercion.
+*/
+switch(fid)
+{
+ case TF_NUMSORT:
+ return(global_numsortstruct.sortspersec);
+ case TF_SSORT:
+ return(global_strsortstruct.sortspersec);
+ case TF_BITOP:
+ return(global_bitopstruct.bitopspersec);
+ case TF_FPEMU:
+ return(global_emfloatstruct.emflops);
+ case TF_FFPU:
+ return(global_fourierstruct.fflops);
+ case TF_ASSIGN:
+ return(global_assignstruct.iterspersec);
+ case TF_IDEA:
+ return(global_ideastruct.iterspersec);
+ case TF_HUFF:
+ return(global_huffstruct.iterspersec);
+ case TF_NNET:
+ return(global_nnetstruct.iterspersec);
+ case TF_LU:
+ return(global_lustruct.iterspersec);
+}
+return((double)0.0);
+}
+
+/******************
+** output_string **
+*******************
+** Displays a string on the screen. Also, if the flag
+** write_to_file is set, outputs the string to the output file.
+** Note, this routine presumes that you've included a carriage
+** return at the end of the buffer.
+*/
+static void output_string(char *buffer)
+{
+
+printf("%s",buffer);
+/*if(write_to_file!=0)
+ fprintf(global_ofile,"%s",buffer);*/
+return;
+}
+
+/***************
+** show_stats **
+****************
+** This routine displays statistics for a particular benchmark.
+** The benchmark is identified by its id.
+*/
+static void show_stats (int bid)
+{
+char buffer[80]; /* Display buffer */
+
+switch(bid)
+{
+ case TF_NUMSORT: /* Numeric sort */
+ sprintf(buffer," Number of arrays: %d\n",
+ global_numsortstruct.numarrays);
+ output_string(buffer);
+ sprintf(buffer," Array size: %ld\n",
+ global_numsortstruct.arraysize);
+ output_string(buffer);
+ break;
+
+ case TF_SSORT: /* String sort */
+ sprintf(buffer," Number of arrays: %d\n",
+ global_strsortstruct.numarrays);
+ output_string(buffer);
+ sprintf(buffer," Array size: %ld\n",
+ global_strsortstruct.arraysize);
+ output_string(buffer);
+ break;
+
+ case TF_BITOP: /* Bitmap operation */
+ sprintf(buffer," Operations array size: %ld\n",
+ global_bitopstruct.bitoparraysize);
+ output_string(buffer);
+ sprintf(buffer," Bitfield array size: %ld\n",
+ global_bitopstruct.bitfieldarraysize);
+ output_string(buffer);
+ break;
+
+ case TF_FPEMU: /* Floating-point emulation */
+ sprintf(buffer," Number of loops: %lu\n",
+ global_emfloatstruct.loops);
+ output_string(buffer);
+ sprintf(buffer," Array size: %lu\n",
+ global_emfloatstruct.arraysize);
+ output_string(buffer);
+ break;
+
+ case TF_FFPU: /* Fourier test */
+ sprintf(buffer," Number of coefficients: %lu\n",
+ global_fourierstruct.arraysize);
+ output_string(buffer);
+ break;
+
+ case TF_ASSIGN:
+ sprintf(buffer," Number of arrays: %lu\n",
+ global_assignstruct.numarrays);
+ output_string(buffer);
+ break;
+
+ case TF_IDEA:
+ sprintf(buffer," Array size: %lu\n",
+ global_ideastruct.arraysize);
+ output_string(buffer);
+ sprintf(buffer," Number of loops: %lu\n",
+ global_ideastruct.loops);
+ output_string(buffer);
+ break;
+
+ case TF_HUFF:
+ sprintf(buffer," Array size: %lu\n",
+ global_huffstruct.arraysize);
+ output_string(buffer);
+ sprintf(buffer," Number of loops: %lu\n",
+ global_huffstruct.loops);
+ output_string(buffer);
+ break;
+
+ case TF_NNET:
+ sprintf(buffer," Number of loops: %lu\n",
+ global_nnetstruct.loops);
+ output_string(buffer);
+ break;
+
+ case TF_LU:
+ sprintf(buffer," Number of arrays: %lu\n",
+ global_lustruct.numarrays);
+ output_string(buffer);
+ break;
+}
+return;
+}
+
+/*
+** Following code added for Mac stuff, so that we can emulate command
+** lines.
+*/
+
+#ifdef MAC
+
+/*****************
+** UCommandLine **
+******************
+** Reads in a command line, and sets up argc and argv appropriately.
+** Note that this routine uses gets() to read in the line. This means
+** you'd better not enter more than 128 characters on a command line, or
+** things will overflow, and oh boy...
+*/
+void UCommandLine(void)
+{
+printf("Enter command line\n:");
+gets((char *)Uargbuff);
+UParse();
+return;
+}
+
+/***********
+** UParse **
+************
+** Parse the pseudo command-line. This code appeared as part of the
+** Small-C library in Dr. Dobb's ToolBook of C.
+** It expects the following globals:
+** argc = arg count
+** argv = Pointer to array of char pointers
+** Uargbuff = Character array that holds the arguments. Should be 129 bytes long.
+** Udummy1 = This is a 2-byte buffer that holds a "*", and acts as the first
+** argument in the argument list. This maintains compatibility with other
+** C's, though it does not provide access to the executable filename.
+** This routine allows for up to 20 individual command-line arguments.
+** Also note that this routine does NOT allow for redirection.
+*/
+void UParse(void)
+{
+unsigned char *ptr;
+
+argc=0; /* Start arg count */
+Udummy[0]='*'; /* Set dummy first argument */
+Udummy[1]='\0';
+argv[argc++]=(char *)Udummy;
+
+ptr=Uargbuff; /* Start pointer */
+while(*ptr)
+{
+ if(isspace(*ptr))
+ { ++ptr;
+ continue;
+ }
+ if(argc<20) argv[argc++]=(char *)ptr;
+ ptr=UField(ptr);
+}
+return;
+}
+/***********
+** UField **
+************
+** Isolate the next command-line field.
+*/
+unsigned char *UField(unsigned char *ptr)
+{
+while(*ptr)
+{ if(isspace(*ptr))
+ { *ptr=(unsigned char)NULL;
+ return(++ptr);
+ }
+ ++ptr;
+}
+return(ptr);
+}
+#endif
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/nbench0.h b/benchmarks/nbench/nbench-byte-2.2.3/nbench0.h
new file mode 100644
index 0000000..cef0928
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/nbench0.h
@@ -0,0 +1,356 @@
+/*
+** nbench0.h
+** Header for nbench0.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+** 10/95 - Added memory array & alignment -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Following should be modified accordingly per each
+** compilation.
+*/
+char *sysname="You can enter your system description in nbench0.h";
+char *compilername="It then will be printed here after you recompile";
+char *compilerversion="Have a nice day";
+
+/* Parameter flags. Must coincide with parameter names array
+** which appears below. */
+#define PF_GMTICKS 0 /* GLOBALMINTICKS */
+#define PF_MINSECONDS 1 /* MINSECONDS */
+#define PF_ALLSTATS 2 /* ALLSTATS */
+#define PF_OUTFILE 3 /* OUTFILE */
+#define PF_CUSTOMRUN 4 /* CUSTOMRUN */
+#define PF_DONUM 5 /* DONUMSORT */
+#define PF_NUMNUMA 6 /* NUMNUMARRAYS */
+#define PF_NUMASIZE 7 /* NUMARRAYSIZE */
+#define PF_NUMMINS 8 /* NUMMINSECONDS */
+#define PF_DOSTR 9 /* DOSTRINGSORT */
+#define PF_STRASIZE 10 /* STRARRAYSIZE */
+#define PF_NUMSTRA 11 /* NUMSTRARRAYS */
+#define PF_STRMINS 12 /* STRMINSECONDS */
+#define PF_DOBITF 13 /* DOBITFIELD */
+#define PF_NUMBITOPS 14 /* NUMBITOPS */
+#define PF_BITFSIZE 15 /* BITFIELDSIZE */
+#define PF_BITMINS 16 /* BITMINSECONDS */
+#define PF_DOEMF 17 /* DOEMF */
+#define PF_EMFASIZE 18 /* EMFARRAYSIZE */
+#define PF_EMFLOOPS 19 /* EMFLOOPS */
+#define PF_EMFMINS 20 /* EMFMINSECOND */
+#define PF_DOFOUR 21 /* DOFOUR */
+#define PF_FOURASIZE 22 /* FOURASIZE */
+#define PF_FOURMINS 23 /* FOURMINSECONDS */
+#define PF_DOASSIGN 24 /* DOASSIGN */
+#define PF_AARRAYS 25 /* ASSIGNARRAYS */
+#define PF_ASSIGNMINS 26 /* ASSIGNMINSECONDS */
+#define PF_DOIDEA 27 /* DOIDEA */
+#define PF_IDEAASIZE 28 /* IDEAARRAYSIZE */
+#define PF_IDEALOOPS 29 /* IDEALOOPS */
+#define PF_IDEAMINS 30 /* IDEAMINSECONDS */
+#define PF_DOHUFF 31 /* DOHUFF */
+#define PF_HUFFASIZE 32 /* HUFFARRAYSIZE */
+#define PF_HUFFLOOPS 33 /* HUFFLOOPS */
+#define PF_HUFFMINS 34 /* HUFFMINSECONDS */
+#define PF_DONNET 35 /* DONNET */
+#define PF_NNETLOOPS 36 /* NNETLOOPS */
+#define PF_NNETMINS 37 /* NNETMINSECONDS */
+#define PF_DOLU 38 /* DOLU */
+#define PF_LUNARRAYS 39 /* LUNUMARRAYS */
+#define PF_LUMINS 40 /* LUMINSECONDS */
+#define PF_ALIGN 41 /* ALIGN */
+
+#define MAXPARAM 41
+
+/* Tests-to-do flags...must coincide with test. */
+#define TF_NUMSORT 0
+#define TF_SSORT 1
+#define TF_BITOP 2
+#define TF_FPEMU 3
+#define TF_FFPU 4
+#define TF_ASSIGN 5
+#define TF_IDEA 6
+#define TF_HUFF 7
+#define TF_NNET 8
+#define TF_LU 9
+
+#define NUMTESTS 10
+
+/*
+** GLOBALS
+*/
+
+#define BUF_SIZ 1024
+
+/*
+** Test names
+*/
+char *ftestnames[] = {
+ "NUMERIC SORT ",
+ "STRING SORT ",
+ "BITFIELD ",
+ "FP EMULATION ",
+ "FOURIER ",
+ "ASSIGNMENT ",
+ "IDEA ",
+ "HUFFMAN ",
+ "NEURAL NET ",
+ "LU DECOMPOSITION" };
+
+/*
+** Indexes -- Baseline is DELL Pentium XP90
+** 11/28/94
+*/
+double bindex[] = {
+ 38.993, /* Numeric sort */
+ 2.238, /* String sort */
+ 5829704, /* Bitfield */
+ 2.084, /* FP Emulation */
+ 879.278, /* Fourier */
+ .2628, /* Assignment */
+ 65.382, /* IDEA */
+ 36.062, /* Huffman */
+ .6225, /* Neural Net */
+ 19.3031 }; /* LU Decomposition */
+
+/*
+** Indices -- Baseline is a AMD K6-233, 32MB RAM (60ns SDRAM),512k L2 cache,
+** Linux kernel 2.0.32, libc-5.4.38, gcc-2.7.2.3)
+** Nov/30/97
+*/
+double lx_bindex[] = {
+ 118.73, /* Numeric sort */
+ 14.459, /* String sort */
+ 27910000, /* Bitfield */
+ 9.0314, /* FP Emulation */
+ 1565.5, /* Fourier */
+ 1.0132, /* Assignment */
+ 220.21, /* IDEA */
+ 112.93, /* Huffman */
+ 1.4799, /* Neural Net */
+ 26.732}; /* LU Decomposition */
+
+/* Parameter names */
+char *paramnames[]= {
+ "GLOBALMINTICKS",
+ "MINSECONDS",
+ "ALLSTATS",
+ "OUTFILE",
+ "CUSTOMRUN",
+ "DONUMSORT",
+ "NUMNUMARRAYS",
+ "NUMARRAYSIZE",
+ "NUMMINSECONDS",
+ "DOSTRINGSORT",
+ "STRARRAYSIZE",
+ "NUMSTRARRAYS",
+ "STRMINSECONDS",
+ "DOBITFIELD",
+ "NUMBITOPS",
+ "BITFIELDSIZE",
+ "BITMINSECONDS",
+ "DOEMF",
+ "EMFARRAYSIZE",
+ "EMFLOOPS",
+ "EMFMINSECONDS",
+ "DOFOUR",
+ "FOURSIZE",
+ "FOURMINSECONDS",
+ "DOASSIGN",
+ "ASSIGNARRAYS",
+ "ASSIGNMINSECONDS",
+ "DOIDEA",
+ "IDEARRAYSIZE",
+ "IDEALOOPS",
+ "IDEAMINSECONDS",
+ "DOHUFF",
+ "HUFARRAYSIZE",
+ "HUFFLOOPS",
+ "HUFFMINSECONDS",
+ "DONNET",
+ "NNETLOOPS",
+ "NNETMINSECONDS",
+ "DOLU",
+ "LUNUMARRAYS",
+ "LUMINSECONDS",
+ "ALIGN" };
+
+/*
+** Following array is a collection of flags indicating which
+** tests to perform.
+*/
+int tests_to_do[NUMTESTS];
+
+/*
+** Buffer for holding output text.
+*/
+char buffer[BUF_SIZ];
+
+/*
+** Global parameters.
+*/
+ulong global_min_ticks; /* Minimum ticks */
+ulong global_min_seconds; /* Minimum seconds tests run */
+int global_allstats; /* Statistics dump flag */
+char global_ofile_name[BUF_SIZ];/* Output file name */
+FILE *global_ofile; /* Output file */
+int global_custrun; /* Custom run flag */
+int write_to_file; /* Write output to file */
+int global_align; /* Memory alignment */
+
+/*
+** Following global is the memory array. This is used to store
+** original and aligned (modified) memory addresses.
+*/
+ulong mem_array[2][MEM_ARRAY_SIZE];
+int mem_array_ents; /* # of active entries */
+
+/*
+** Following are global structures, one built for
+** each of the tests.
+*/
+SortStruct global_numsortstruct; /* For numeric sort */
+SortStruct global_strsortstruct; /* For string sort */
+BitOpStruct global_bitopstruct; /* For bitfield operations */
+EmFloatStruct global_emfloatstruct; /* For emul. float. point */
+FourierStruct global_fourierstruct; /* For fourier test */
+AssignStruct global_assignstruct; /* For assignment algorithm */
+IDEAStruct global_ideastruct; /* For IDEA encryption */
+HuffStruct global_huffstruct; /* For Huffman compression */
+NNetStruct global_nnetstruct; /* For Neural Net */
+LUStruct global_lustruct; /* For LU decomposition */
+
+/*
+** The following array of function struct pointers lets
+** us very rapidly map a function to its controlling
+** data structure. NOTE: These must match the "TF_xxx"
+** constants above.
+*/
+void *global_fstruct[] =
+{ (void *)&global_numsortstruct,
+ (void *)&global_strsortstruct,
+ (void *)&global_bitopstruct,
+ (void *)&global_emfloatstruct,
+ (void *)&global_fourierstruct,
+ (void *)&global_assignstruct,
+ (void *)&global_ideastruct,
+ (void *)&global_huffstruct,
+ (void *)&global_nnetstruct,
+ (void *)&global_lustruct };
+
+/*
+** Following globals added to support command line emulation on
+** the Macintosh....which doesn't have command lines.
+*/
+#ifdef MAC
+int argc; /* Argument count */
+char *argv[20]; /* Argument vectors */
+
+unsigned char Uargbuff[129]; /* Buffer holding arguments string */
+unsigned char Udummy[2]; /* Dummy buffer for first arg */
+
+#endif
+
+#ifdef MACTIMEMGR
+#include <Types.h>
+#include <Timer.h>
+/*
+** Timer globals for Mac
+*/
+struct TMTask myTMTask;
+long MacHSTdelay,MacHSTohead;
+
+#endif
+
+/*
+** Following globals used by Win 31 timing routines.
+** NOTE: This requires the includes of the w31timer.asm
+** file in your project!!
+*/
+#ifdef WIN31TIMER
+#include <windows.h>
+#include <toolhelp.h>
+extern TIMERINFO win31tinfo;
+extern HANDLE hThlp;
+extern FARPROC lpfn;
+#endif
+
+/*
+** PROTOTYPES
+*/
+static int parse_arg(char *argptr);
+static void display_help(char *progname);
+static void read_comfile(FILE *cfile);
+static int getflag(char *cptr);
+static void strtoupper(char *s);
+static void set_request_secs(void);
+static int bench_with_confidence(int fid,
+ double *mean, double *stdev, ulong *numtries);
+/*
+static int seek_confidence(double scores[5],
+ double *newscore, double *c_half_interval,
+ double *smean,double *sdev);
+*/
+static int calc_confidence(double scores[],
+ int num_scores,
+ double *c_half_interval,double *smean,
+ double *sdev);
+static double getscore(int fid);
+static void output_string(char *buffer);
+static void show_stats(int bid);
+
+#ifdef MAC
+void UCommandLine(void);
+void UParse(void);
+unsigned char *UField(unsigned char *ptr);
+#endif
+
+/*
+** EXTERNAL PROTOTYPES
+*/
+extern void DoNumSort(void); /* From NBENCH1 */
+extern void DoStringSort(void);
+extern void DoBitops(void);
+extern void DoEmFloat(void);
+extern void DoFourier(void);
+extern void DoAssign(void);
+extern void DoIDEA(void);
+extern void DoHuffman(void);
+extern void DoNNET(void);
+extern void DoLU(void);
+
+extern void ErrorExit(void); /* From SYSSPEC */
+
+/*
+** Array of pointers to the benchmark functions.
+*/
+void (*funcpointer[])(void) =
+{ DoNumSort,
+ DoStringSort,
+ DoBitops,
+ DoEmFloat,
+ DoFourier,
+ DoAssign,
+ DoIDEA,
+ DoHuffman,
+ DoNNET,
+ DoLU };
+
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/nbench1.c b/benchmarks/nbench/nbench-byte-2.2.3/nbench1.c
new file mode 100644
index 0000000..68e302a
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/nbench1.c
@@ -0,0 +1,4449 @@
+
+/*
+** nbench1.c
+*/
+
+/********************************
+** BYTEmark (tm) **
+** BYTE NATIVE MODE BENCHMARKS **
+** VERSION 2 **
+** **
+** Included in this source **
+** file: **
+** Numeric Heapsort **
+** String Heapsort **
+** Bitfield test **
+** Floating point emulation **
+** Fourier coefficients **
+** Assignment algorithm **
+** IDEA Encyption **
+** Huffman compression **
+** Back prop. neural net **
+** LU Decomposition **
+** (linear equations) **
+** ---------- **
+** Rick Grehan, BYTE Magazine **
+*********************************
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+** 10/95 - Removed allocation that was taking place inside
+** the LU Decomposition benchmark. Though it didn't seem to
+** make a difference on systems we ran it on, it nonetheless
+** removes an operating system dependency that probably should
+** not have been there.
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** INCLUDES
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <math.h>
+#include "nmglobal.h"
+#include "nbench1.h"
+#include "wordcat.h"
+
+#ifdef DEBUG
+static int numsort_status=0;
+static int stringsort_status=0;
+#endif
+
+/*********************
+** NUMERIC HEAPSORT **
+**********************
+** This test implements a heapsort algorithm, performed on an
+** array of longs.
+*/
+
+/**************
+** DoNumSort **
+***************
+** This routine performs the CPU numeric sort test.
+** NOTE: Last version incorrectly stated that the routine
+** returned result in # of longword sorted per second.
+** Not so; the routine returns # of iterations per sec.
+*/
+
+void DoNumSort(void)
+{
+SortStruct *numsortstruct; /* Local pointer to global struct */
+farlong *arraybase; /* Base pointers of array */
+long accumtime; /* Accumulated time */
+double iterations; /* Iteration counter */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For holding error codes */
+
+/*
+** Link to global structure
+*/
+numsortstruct=&global_numsortstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Numeric Sort";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(numsortstruct->adjust==0)
+{
+ /*
+ ** Self-adjustment code. The system begins by sorting 1
+ ** array. If it does that in no time, then two arrays
+ ** are built and sorted. This process continues until
+ ** enough arrays are built to handle the tolerance.
+ */
+ numsortstruct->numarrays=1;
+ while(1)
+ {
+ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *)AllocateMemory(sizeof(long) *
+ numsortstruct->numarrays * numsortstruct->arraysize,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Do an iteration of the numeric sort. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then allocate for more arrays and
+ ** try again.
+ */
+ if(DoNumSortIteration(arraybase,
+ numsortstruct->arraysize,
+ numsortstruct->numarrays)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)arraybase,&systemerror);
+ if(numsortstruct->numarrays++>NUMNUMARRAYS)
+ { printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
+ ErrorExit();
+ }
+ }
+}
+else
+{ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *)AllocateMemory(sizeof(long) *
+ numsortstruct->numarrays * numsortstruct->arraysize,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+
+}
+/*
+** All's well if we get here. Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoNumSortIteration(arraybase,
+ numsortstruct->arraysize,
+ numsortstruct->numarrays);
+ iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+numsortstruct->sortspersec=iterations *
+ (double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(numsortstruct->adjust==0)
+ numsortstruct->adjust=1;
+
+#ifdef DEBUG
+if (numsort_status==0) printf("Numeric sort: OK\n");
+numsort_status=0;
+#endif
+return;
+}
+
+/***********************
+** DoNumSortIteration **
+************************
+** This routine executes one iteration of the numeric
+** sort benchmark. It returns the number of ticks
+** elapsed for the iteration.
+*/
+static ulong DoNumSortIteration(farlong *arraybase,
+ ulong arraysize,
+ uint numarrays)
+{
+ulong elapsed; /* Elapsed ticks */
+ulong i;
+/*
+** Load up the array with random numbers
+*/
+LoadNumArrayWithRand(arraybase,arraysize,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute a heap of heapsorts
+*/
+for(i=0;i<numarrays;i++)
+ NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
+
+/*
+** Get elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+#ifdef DEBUG
+{
+ for(i=0;i<arraysize-1;i++)
+ { /*
+ ** Compare to check for proper
+ ** sort.
+ */
+ if(arraybase[i+1]<arraybase[i])
+ { printf("Sort Error\n");
+ numsort_status=1;
+ break;
+ }
+ }
+}
+#endif
+
+return(elapsed);
+}
+
+/*************************
+** LoadNumArrayWithRand **
+**************************
+** Load up an array with random longs.
+*/
+static void LoadNumArrayWithRand(farlong *array, /* Pointer to arrays */
+ ulong arraysize,
+ uint numarrays) /* # of elements in array */
+{
+long i; /* Used for index */
+farlong *darray; /* Destination array pointer */
+/*
+** Initialize the random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Load up first array with randoms
+*/
+for(i=0L;i<arraysize;i++)
+ /* array[i]=randnum(0L); */
+ array[i]=randnum((int32)0);
+
+/*
+** Now, if there's more than one array to load, copy the
+** first into each of the others.
+*/
+darray=array;
+while(--numarrays)
+{ darray+=arraysize;
+ for(i=0L;i<arraysize;i++)
+ darray[i]=array[i];
+}
+
+return;
+}
+
+/****************
+** NumHeapSort **
+*****************
+** Pass this routine a pointer to an array of long
+** integers. Also pass in minimum and maximum offsets.
+** This routine performs a heap sort on that array.
+*/
+static void NumHeapSort(farlong *array,
+ ulong bottom, /* Lower bound */
+ ulong top) /* Upper bound */
+{
+ulong temp; /* Used to exchange elements */
+ulong i; /* Loop index */
+
+/*
+** First, build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+ NumSift(array,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array. When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{ NumSift(array,bottom,i);
+ temp=*array; /* Perform exchange */
+ *array=*(array+i);
+ *(array+i)=temp;
+}
+return;
+}
+
+/************
+** NumSift **
+*************
+** Peforms the sift operation on a numeric array,
+** constructing a heap in the array.
+*/
+static void NumSift(farlong *array, /* Array of numbers */
+ ulong i, /* Minimum of array */
+ ulong j) /* Maximum of array */
+{
+unsigned long k;
+long temp; /* Used for exchange */
+
+while((i+i)<=j)
+{
+ k=i+i;
+ if(k<j)
+ if(array[k]<array[k+1L])
+ ++k;
+ if(array[i]<array[k])
+ {
+ temp=array[k];
+ array[k]=array[i];
+ array[i]=temp;
+ i=k;
+ }
+ else
+ i=j+1;
+}
+return;
+}
+
+/********************
+** STRING HEAPSORT **
+********************/
+
+/*****************
+** DoStringSort **
+******************
+** This routine performs the CPU string sort test.
+** Arguments:
+** requested_secs = # of seconds to execute test
+** stringspersec = # of strings per second sorted (RETURNED)
+*/
+void DoStringSort(void)
+{
+SortStruct *strsortstruct; /* Local for sort structure */
+faruchar *arraybase; /* Base pointer of char array */
+long accumtime; /* Accumulated time */
+double iterations; /* # of iterations */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For holding error code */
+
+printf("\n\nnbench1.c: DoStringSort...\n");
+
+/*
+** Link to global structure
+*/
+strsortstruct=&global_strsortstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:String Sort";
+
+/*
+** See if we have to perform self-adjustment code
+*/
+if(strsortstruct->adjust==0)
+{
+ /*
+ ** Initialize the number of arrays.
+ */
+ strsortstruct->numarrays=1;
+ while(1)
+ {
+ /*
+ ** Allocate space for array. We'll add an extra 100
+ ** bytes to protect memory as strings move around
+ ** (this can happen during string adjustment)
+ */
+ arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+ (long)strsortstruct->numarrays,&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Do an iteration of the string sort. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then de-allocate the array, reallocate a
+ ** an additional array, and try again.
+ */
+ if(DoStringSortIteration(arraybase,
+ strsortstruct->numarrays,
+ strsortstruct->arraysize)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)arraybase,&systemerror);
+ strsortstruct->numarrays+=1;
+ }
+}
+else
+{
+ /*
+ ** We don't have to perform self adjustment code.
+ ** Simply allocate the space for the array.
+ */
+ arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
+ (long)strsortstruct->numarrays,&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+}
+
+printf("\n\nnbench1.c: DoStringSort AllocateMemory done \n");
+
+/*
+** All's well if we get here. Repeatedly perform sorts until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoStringSortIteration(arraybase,
+ strsortstruct->numarrays,
+ strsortstruct->arraysize);
+ iterations+=(double)strsortstruct->numarrays;
+} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Set flag to show we don't need to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
+if(strsortstruct->adjust==0)
+ strsortstruct->adjust=1;
+#ifdef DEBUG
+if (stringsort_status==0) printf("String sort: OK\n");
+stringsort_status=0;
+#endif
+return;
+}
+
+/**************************
+** DoStringSortIteration **
+***************************
+** This routine executes one iteration of the string
+** sort benchmark. It returns the number of ticks
+** Note that this routine also builds the offset pointer
+** array.
+*/
+static ulong DoStringSortIteration(faruchar *arraybase,
+ uint numarrays,ulong arraysize)
+{
+farulong *optrarray; /* Offset pointer array */
+unsigned long elapsed; /* Elapsed ticks */
+unsigned long nstrings; /* # of strings in array */
+int syserror; /* System error code */
+unsigned int i; /* Index */
+farulong *tempobase; /* Temporary offset pointer base */
+faruchar *tempsbase; /* Temporary string base pointer */
+
+/*
+** Load up the array(s) with random numbers
+*/
+optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
+
+/*
+** Set temp base pointers...they will be modified as the
+** benchmark proceeds.
+*/
+tempobase=optrarray;
+tempsbase=arraybase;
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute heapsorts
+*/
+for(i=0;i<numarrays;i++)
+{ StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
+ tempobase+=nstrings; /* Advance base pointers */
+ tempsbase+=arraysize+100;
+}
+
+/*
+** Record elapsed time
+*/
+elapsed=StopStopwatch(elapsed);
+
+#ifdef DEBUG
+{
+ unsigned long i;
+ for(i=0;i<nstrings-1;i++)
+ { /*
+ ** Compare strings to check for proper
+ ** sort.
+ */
+ if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
+ { printf("Sort Error\n");
+ stringsort_status=1;
+ break;
+ }
+ }
+}
+#endif
+
+/*
+** Release the offset pointer array built by
+** LoadStringArray()
+*/
+FreeMemory((farvoid *)optrarray,&syserror);
+
+/*
+** Return elapsed ticks.
+*/
+return(elapsed);
+}
+
+/********************
+** LoadStringArray **
+*********************
+** Initialize the string array with random strings of
+** varying sizes.
+** Returns the pointer to the offset pointer array.
+** Note that since we're creating a number of arrays, this
+** routine builds one array, then copies it into the others.
+*/
+static farulong *LoadStringArray(faruchar *strarray, /* String array */
+ uint numarrays, /* # of arrays */
+ ulong *nstrings, /* # of strings */
+ ulong arraysize) /* Size of array */
+{
+faruchar *tempsbase; /* Temporary string base pointer */
+farulong *optrarray; /* Local for pointer */
+farulong *tempobase; /* Temporary offset pointer base pointer */
+unsigned long curroffset; /* Current offset */
+int fullflag; /* Indicates full array */
+unsigned char stringlength; /* Length of string */
+unsigned char i; /* Index */
+unsigned long j; /* Another index */
+unsigned int k; /* Yet another index */
+unsigned int l; /* Ans still one more index */
+int systemerror; /* For holding error code */
+
+/*
+** Initialize random number generator.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Start with no strings. Initialize our current offset pointer
+** to 0.
+*/
+*nstrings=0L;
+curroffset=0L;
+fullflag=0;
+
+do
+{
+ /*
+ ** Allocate a string with a random length no
+ ** shorter than 4 bytes and no longer than
+ ** 80 bytes. Note we have to also make sure
+ ** there's room in the array.
+ */
+ /* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
+ stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
+ if((unsigned long)stringlength+curroffset+1L>=arraysize)
+ { stringlength=(unsigned char)((arraysize-curroffset-1L) &
+ 0xFF);
+ fullflag=1; /* Indicates a full */
+ }
+
+ /*
+ ** Store length at curroffset and advance current offset.
+ */
+ *(strarray+curroffset)=stringlength;
+ curroffset++;
+
+ /*
+ ** Fill up the rest of the string with random bytes.
+ */
+ for(i=0;i<stringlength;i++)
+ { *(strarray+curroffset)=
+ /* (unsigned char)(abs_randwc((long)0xFE)); */
+ (unsigned char)(abs_randwc((int32)0xFE));
+ curroffset++;
+ }
+
+ /*
+ ** Increment the # of strings counter.
+ */
+ *nstrings+=1L;
+
+} while(fullflag==0);
+
+/*
+** We now have initialized a single full array. If there
+** is more than one array, copy the original into the
+** others.
+*/
+k=1;
+tempsbase=strarray;
+while(k<numarrays)
+{ tempsbase+=arraysize+100; /* Set base */
+ for(l=0;l<arraysize;l++)
+ tempsbase[l]=strarray[l];
+ k++;
+}
+
+/*
+** Now the array is full, allocate enough space for an
+** offset pointer array.
+*/
+optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
+ numarrays,
+ &systemerror);
+if(systemerror)
+{ ReportError("CPU:Stringsort",systemerror);
+ FreeMemory((void *)strarray,&systemerror);
+ ErrorExit();
+}
+
+/*
+** Go through the newly-built string array, building
+** offsets and putting them into the offset pointer
+** array.
+*/
+curroffset=0;
+for(j=0;j<*nstrings;j++)
+{ *(optrarray+j)=curroffset;
+ curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
+}
+
+/*
+** As above, we've made one copy of the offset pointers,
+** so duplicate this array in the remaining ones.
+*/
+k=1;
+tempobase=optrarray;
+while(k<numarrays)
+{ tempobase+=*nstrings;
+ for(l=0;l<*nstrings;l++)
+ tempobase[l]=optrarray[l];
+ k++;
+}
+
+/*
+** All done...go home. Pass local pointer back.
+*/
+return(optrarray);
+}
+
+/**************
+** stradjust **
+***************
+** Used by the string heap sort. Call this routine to adjust the
+** string at offset i to length l. The members of the string array
+** are moved accordingly and the length of the string at offset i
+** is set to l.
+*/
+static void stradjust(farulong *optrarray, /* Offset pointer array */
+ faruchar *strarray, /* String array */
+ ulong nstrings, /* # of strings */
+ ulong i, /* Offset to adjust */
+ uchar l) /* New length */
+{
+unsigned long nbytes; /* # of bytes to move */
+unsigned long j; /* Index */
+int direction; /* Direction indicator */
+unsigned char adjamount; /* Adjustment amount */
+
+/*
+** If new length is less than old length, the direction is
+** down. If new length is greater than old length, the
+** direction is up.
+*/
+direction=(int)l - (int)*(strarray+*(optrarray+i));
+adjamount=(unsigned char)abs(direction);
+
+/*
+** See if the adjustment is being made to the last
+** string in the string array. If so, we don't have to
+** do anything more than adjust the length field.
+*/
+if(i==(nstrings-1L))
+{ *(strarray+*(optrarray+i))=l;
+ return;
+}
+
+/*
+** Calculate the total # of bytes in string array from
+** location i+1 to end of array. Whether we're moving "up" or
+** down, this is how many bytes we'll have to move.
+*/
+nbytes=*(optrarray+nstrings-1L) +
+ (unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
+ *(optrarray+i+1L);
+
+/*
+** Calculate the source and the destination. Source is
+** string position i+1. Destination is string position i+l
+** (i+"ell"...don't confuse 1 and l).
+** Hand this straight to memmove and let it handle the
+** "overlap" problem.
+*/
+MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
+ (farvoid *)(strarray+*(optrarray+i+1)),
+ (unsigned long)nbytes);
+
+/*
+** We have to adjust the offset pointer array.
+** This covers string i+1 to numstrings-1.
+*/
+for(j=i+1;j<nstrings;j++)
+ if(direction<0)
+ *(optrarray+j)=*(optrarray+j)-adjamount;
+ else
+ *(optrarray+j)=*(optrarray+j)+adjamount;
+
+/*
+** Store the new length and go home.
+*/
+*(strarray+*(optrarray+i))=l;
+return;
+}
+
+/****************
+** strheapsort **
+*****************
+** Pass this routine a pointer to an array of unsigned char.
+** The array is presumed to hold strings occupying at most
+** 80 bytes (counts a byte count).
+** This routine also needs a pointer to an array of offsets
+** which represent string locations in the array, and
+** an unsigned long indicating the number of strings
+** in the array.
+*/
+static void StrHeapSort(farulong *optrarray, /* Offset pointers */
+ faruchar *strarray, /* Strings array */
+ ulong numstrings, /* # of strings in array */
+ ulong bottom, /* Region to sort...bottom */
+ ulong top) /* Region to sort...top */
+{
+unsigned char temp[80]; /* Used to exchange elements */
+unsigned char tlen; /* Temp to hold length */
+unsigned long i; /* Loop index */
+
+
+/*
+** Build a heap in the array
+*/
+for(i=(top/2L); i>0; --i)
+ strsift(optrarray,strarray,numstrings,i,top);
+
+/*
+** Repeatedly extract maximum from heap and place it at the
+** end of the array. When we get done, we'll have a sorted
+** array.
+*/
+for(i=top; i>0; --i)
+{
+ strsift(optrarray,strarray,numstrings,0,i);
+
+ /* temp = string[0] */
+ tlen=*strarray;
+ MoveMemory((farvoid *)&temp[0], /* Perform exchange */
+ (farvoid *)strarray,
+ (unsigned long)(tlen+1));
+
+
+ /* string[0]=string[i] */
+ tlen=*(strarray+*(optrarray+i));
+ stradjust(optrarray,strarray,numstrings,0,tlen);
+ MoveMemory((farvoid *)strarray,
+ (farvoid *)(strarray+*(optrarray+i)),
+ (unsigned long)(tlen+1));
+
+ /* string[i]=temp */
+ tlen=temp[0];
+ stradjust(optrarray,strarray,numstrings,i,tlen);
+ MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+ (farvoid *)&temp[0],
+ (unsigned long)(tlen+1));
+
+}
+return;
+}
+
+/****************
+** str_is_less **
+*****************
+** Pass this function:
+** 1) A pointer to an array of offset pointers
+** 2) A pointer to a string array
+** 3) The number of elements in the string array
+** 4) Offsets to two strings (a & b)
+** This function returns TRUE if string a is < string b.
+*/
+static int str_is_less(farulong *optrarray, /* Offset pointers */
+ faruchar *strarray, /* String array */
+ ulong numstrings, /* # of strings */
+ ulong a, ulong b) /* Offsets */
+{
+int slen; /* String length */
+
+/*
+** Determine which string has the minimum length. Use that
+** to call strncmp(). If they match up to that point, the
+** string with the longer length wins.
+*/
+slen=(int)*(strarray+*(optrarray+a));
+if(slen > (int)*(strarray+*(optrarray+b)))
+ slen=(int)*(strarray+*(optrarray+b));
+
+slen=strncmp((char *)(strarray+*(optrarray+a)),
+ (char *)(strarray+*(optrarray+b)),slen);
+
+if(slen==0)
+{
+ /*
+ ** They match. Return true if the length of a
+ ** is greater than the length of b.
+ */
+ if(*(strarray+*(optrarray+a)) >
+ *(strarray+*(optrarray+b)))
+ return(TRUE);
+ return(FALSE);
+}
+
+if(slen<0) return(TRUE); /* a is strictly less than b */
+
+return(FALSE); /* Only other possibility */
+}
+
+/************
+** strsift **
+*************
+** Pass this function:
+** 1) A pointer to an array of offset pointers
+** 2) A pointer to a string array
+** 3) The number of elements in the string array
+** 4) Offset within which to sort.
+** Sift the array within the bounds of those offsets (thus
+** building a heap).
+*/
+static void strsift(farulong *optrarray, /* Offset pointers */
+ faruchar *strarray, /* String array */
+ ulong numstrings, /* # of strings */
+ ulong i, ulong j) /* Offsets */
+{
+unsigned long k; /* Temporaries */
+unsigned char temp[80];
+unsigned char tlen; /* For string lengths */
+
+
+while((i+i)<=j)
+{
+ k=i+i;
+ if(k<j)
+ if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
+ ++k;
+ if(str_is_less(optrarray,strarray,numstrings,i,k))
+ {
+ /* temp=string[k] */
+ tlen=*(strarray+*(optrarray+k));
+ MoveMemory((farvoid *)&temp[0],
+ (farvoid *)(strarray+*(optrarray+k)),
+ (unsigned long)(tlen+1));
+
+ /* string[k]=string[i] */
+ tlen=*(strarray+*(optrarray+i));
+ stradjust(optrarray,strarray,numstrings,k,tlen);
+ MoveMemory((farvoid *)(strarray+*(optrarray+k)),
+ (farvoid *)(strarray+*(optrarray+i)),
+ (unsigned long)(tlen+1));
+
+ /* string[i]=temp */
+ tlen=temp[0];
+ stradjust(optrarray,strarray,numstrings,i,tlen);
+ MoveMemory((farvoid *)(strarray+*(optrarray+i)),
+ (farvoid *)&temp[0],
+ (unsigned long)(tlen+1));
+ i=k;
+ }
+ else
+ i=j+1;
+}
+return;
+}
+
+/************************
+** BITFIELD OPERATIONS **
+*************************/
+
+/*************
+** DoBitops **
+**************
+** Perform the bit operations test portion of the CPU
+** benchmark. Returns the iterations per second.
+*/
+void DoBitops(void)
+{
+BitOpStruct *locbitopstruct; /* Local bitop structure */
+farulong *bitarraybase; /* Base of bitmap array */
+farulong *bitoparraybase; /* Base of bitmap operations array */
+ulong nbitops; /* # of bitfield operations */
+ulong accumtime; /* Accumulated time in ticks */
+double iterations; /* # of iterations */
+char *errorcontext; /* Error context string */
+int systemerror; /* For holding error codes */
+int ticks;
+
+/*
+** Link to global structure.
+*/
+locbitopstruct=&global_bitopstruct;
+
+/*
+** Set the error context.
+*/
+errorcontext="CPU:Bitfields";
+
+/*
+** See if we need to run adjustment code.
+*/
+if(locbitopstruct->adjust==0)
+{
+ bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+ sizeof(ulong),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Initialize bitfield operations array to [2,30] elements
+ */
+ locbitopstruct->bitoparraysize=30L;
+
+ while(1)
+ {
+ /*
+ ** Allocate space for operations array
+ */
+ bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+ sizeof(ulong),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)bitarraybase,&systemerror);
+ ErrorExit();
+ }
+ /*
+ ** Do an iteration of the bitmap test. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then de-allocate the array, reallocate a
+ ** larger version, and try again.
+ */
+ ticks=DoBitfieldIteration(bitarraybase,
+ bitoparraybase,
+ locbitopstruct->bitoparraysize,
+ &nbitops);
+#ifdef DEBUG
+#ifdef LINUX
+ if (locbitopstruct->bitoparraysize==30L){
+ /* this is the first loop, write a debug file */
+ FILE *file;
+ unsigned long *running_base; /* same as farulong */
+ long counter;
+ file=fopen("debugbit.dat","w");
+ running_base=bitarraybase;
+ for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
+#ifdef LONG64
+ fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
+ fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
+ if ((counter+1)%4==0) fprintf(file,"\n");
+#else
+ fprintf(file,"%08lX",*running_base);
+ if ((counter+1)%8==0) fprintf(file,"\n");
+#endif
+ running_base=running_base+1;
+ }
+ fclose(file);
+ printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
+ }
+#endif
+#endif
+
+ if (ticks>global_min_ticks) break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)bitoparraybase,&systemerror);
+ locbitopstruct->bitoparraysize+=100L;
+ }
+}
+else
+{
+ /*
+ ** Don't need to do self adjustment, just allocate
+ ** the array space.
+ */
+ bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
+ sizeof(ulong),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+ bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
+ sizeof(ulong),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)bitarraybase,&systemerror);
+ ErrorExit();
+ }
+}
+
+/*
+** All's well if we get here. Repeatedly perform bitops until the
+** accumulated elapsed time is greater than # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+ accumtime+=DoBitfieldIteration(bitarraybase,
+ bitoparraybase,
+ locbitopstruct->bitoparraysize,&nbitops);
+ iterations+=(double)nbitops;
+} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home.
+** Also, set adjustment flag to show that we don't have
+** to do self adjusting in the future.
+*/
+FreeMemory((farvoid *)bitarraybase,&systemerror);
+FreeMemory((farvoid *)bitoparraybase,&systemerror);
+locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
+if(locbitopstruct->adjust==0)
+ locbitopstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoBitfieldIteration **
+*************************
+** Perform a single iteration of the bitfield benchmark.
+** Return the # of ticks accumulated by the operation.
+*/
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+ farulong *bitoparraybase,
+ long bitoparraysize,
+ ulong *nbitops)
+{
+long i; /* Index */
+ulong bitoffset; /* Offset into bitmap */
+ulong elapsed; /* Time to execute */
+/*
+** Clear # bitops counter
+*/
+*nbitops=0L;
+
+/*
+** Construct a set of bitmap offsets and run lengths.
+** The offset can be any random number from 0 to the
+** size of the bitmap (in bits). The run length can
+** be any random number from 1 to the number of bits
+** between the offset and the end of the bitmap.
+** Note that the bitmap has 8192 * 32 bits in it.
+** (262,144 bits)
+*/
+/*
+** Reset random number generator so things repeat.
+** Also reset the bit array we work on.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
+{
+#ifdef LONG64
+ *(bitarraybase+i)=(ulong)0x5555555555555555;
+#else
+ *(bitarraybase+i)=(ulong)0x55555555;
+#endif
+}
+randnum((int32)13);
+/* end of addition of code */
+
+for (i=0;i<bitoparraysize;i++)
+{
+ /* First item is offset */
+ /* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
+ *(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
+
+ /* Next item is run length */
+ /* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
+ *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
+}
+
+/*
+** Array of offset and lengths built...do an iteration of
+** the test.
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Loop through array off offset/run length pairs.
+** Execute operation based on modulus of index.
+*/
+for(i=0;i<bitoparraysize;i++)
+{
+ switch(i % 3)
+ {
+
+ case 0: /* Set run of bits */
+ ToggleBitRun(bitarraybase,
+ *(bitoparraybase+i+i),
+ *(bitoparraybase+i+i+1),
+ 1);
+ break;
+
+ case 1: /* Clear run of bits */
+ ToggleBitRun(bitarraybase,
+ *(bitoparraybase+i+i),
+ *(bitoparraybase+i+i+1),
+ 0);
+ break;
+
+ case 2: /* Complement run of bits */
+ FlipBitRun(bitarraybase,
+ *(bitoparraybase+i+i),
+ *(bitoparraybase+i+i+1));
+ break;
+ }
+}
+
+/*
+** Return elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+
+/*****************************
+** ToggleBitRun *
+******************************
+** Set or clear a run of nbits starting at
+** bit_addr in bitmap.
+*/
+static void ToggleBitRun(farulong *bitmap, /* Bitmap */
+ ulong bit_addr, /* Address of bits to set */
+ ulong nbits, /* # of bits to set/clr */
+ uint val) /* 1 or 0 */
+{
+unsigned long bindex; /* Index into array */
+unsigned long bitnumb; /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+ bindex=bit_addr>>6; /* Index is number /64 */
+ bitnumb=bit_addr % 64; /* Bit number in word */
+#else
+ bindex=bit_addr>>5; /* Index is number /32 */
+ bitnumb=bit_addr % 32; /* bit number in word */
+#endif
+ if(val)
+ bitmap[bindex]|=(1L<<bitnumb);
+ else
+ bitmap[bindex]&=~(1L<<bitnumb);
+ bit_addr++;
+}
+return;
+}
+
+/***************
+** FlipBitRun **
+****************
+** Complements a run of bits.
+*/
+static void FlipBitRun(farulong *bitmap, /* Bit map */
+ ulong bit_addr, /* Bit address */
+ ulong nbits) /* # of bits to flip */
+{
+unsigned long bindex; /* Index into array */
+unsigned long bitnumb; /* Bit number */
+
+while(nbits--)
+{
+#ifdef LONG64
+ bindex=bit_addr>>6; /* Index is number /64 */
+ bitnumb=bit_addr % 64; /* Bit number in longword */
+#else
+ bindex=bit_addr>>5; /* Index is number /32 */
+ bitnumb=bit_addr % 32; /* Bit number in longword */
+#endif
+ bitmap[bindex]^=(1L<<bitnumb);
+ bit_addr++;
+}
+
+return;
+}
+
+/*****************************
+** FLOATING-POINT EMULATION **
+*****************************/
+
+/**************
+** DoEmFloat **
+***************
+** Perform the floating-point emulation routines portion of the
+** CPU benchmark. Returns the operations per second.
+*/
+void DoEmFloat(void)
+{
+EmFloatStruct *locemfloatstruct; /* Local structure */
+InternalFPF *abase; /* Base of A array */
+InternalFPF *bbase; /* Base of B array */
+InternalFPF *cbase; /* Base of C array */
+ulong accumtime; /* Accumulated time in ticks */
+double iterations; /* # of iterations */
+ulong tickcount; /* # of ticks */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For holding error code */
+ulong loops; /* # of loops */
+
+/*
+** Link to global structure
+*/
+locemfloatstruct=&global_emfloatstruct;
+
+/*
+** Set the error context
+*/
+errorcontext="CPU:Floating Emulation";
+
+
+/*
+** Test the emulation routines.
+*/
+#ifdef DEBUG
+#endif
+
+abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ ErrorExit();
+}
+
+bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)abase,&systemerror);
+ ErrorExit();
+}
+
+cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ ErrorExit();
+}
+
+/*
+** Set up the arrays
+*/
+SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
+
+/*
+** See if we need to do self-adjusting code.
+*/
+if(locemfloatstruct->adjust==0)
+{
+ locemfloatstruct->loops=0;
+
+ /*
+ ** Do an iteration of the tests. If the elapsed time is
+ ** less than minimum, increase the loop count and try
+ ** again.
+ */
+ for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
+ { tickcount=DoEmFloatIteration(abase,bbase,cbase,
+ locemfloatstruct->arraysize,
+ loops);
+ if(tickcount>global_min_ticks)
+ { locemfloatstruct->loops=loops;
+ break;
+ }
+ }
+}
+
+/*
+** Verify that selft adjustment code worked.
+*/
+if(locemfloatstruct->loops==0)
+{ printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ FreeMemory((farvoid *)cbase,&systemerror);
+ ErrorExit();
+}
+
+/*
+** All's well if we get here. Repeatedly perform floating
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+** Each iteration performs arraysize * 3 operations.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+ accumtime+=DoEmFloatIteration(abase,bbase,cbase,
+ locemfloatstruct->arraysize,
+ locemfloatstruct->loops);
+ iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also, indicate that adjustment is done.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+FreeMemory((farvoid *)cbase,&systemerror);
+
+locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
+ (double)TicksToFracSecs(accumtime);
+if(locemfloatstruct->adjust==0)
+ locemfloatstruct->adjust=1;
+
+#ifdef DEBUG
+printf("----------------------------------------------------------------------------\n");
+#endif
+return;
+}
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/**************
+** DoFourier **
+***************
+** Perform the transcendental/trigonometric portion of the
+** benchmark. This benchmark calculates the first n
+** fourier coefficients of the function (x+1)^x defined
+** on the interval 0,2.
+*/
+void DoFourier(void)
+{
+FourierStruct *locfourierstruct; /* Local fourier struct */
+fardouble *abase; /* Base of A[] coefficients array */
+fardouble *bbase; /* Base of B[] coefficients array */
+unsigned long accumtime; /* Accumulated time in ticks */
+double iterations; /* # of iterations */
+char *errorcontext; /* Error context string pointer */
+int systemerror; /* For error code */
+
+/*
+** Link to global structure
+*/
+locfourierstruct=&global_fourierstruct;
+
+/*
+** Set error context string
+*/
+errorcontext="FPU:Transcendental";
+
+/*
+** See if we need to do self-adjustment code.
+*/
+if(locfourierstruct->adjust==0)
+{
+ locfourierstruct->arraysize=100L; /* Start at 100 elements */
+ while(1)
+ {
+
+ abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((void *)abase,&systemerror);
+ ErrorExit();
+ }
+ /*
+ ** Do an iteration of the tests. If the elapsed time is
+ ** less than or equal to the permitted minimum, re-allocate
+ ** larger arrays and try again.
+ */
+ if(DoFPUTransIteration(abase,bbase,
+ locfourierstruct->arraysize)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ /*
+ ** Make bigger arrays and try again.
+ */
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ locfourierstruct->arraysize+=50L;
+ }
+}
+else
+{ /*
+ ** Don't need self-adjustment. Just allocate the
+ ** arrays, and go.
+ */
+ abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ ErrorExit();
+ }
+
+ bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((void *)abase,&systemerror);
+ ErrorExit();
+ }
+}
+/*
+** All's well if we get here. Repeatedly perform integration
+** tests until the accumulated time is greater than the
+** # of seconds requested.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+do {
+ accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
+ iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
+} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
+
+
+/*
+** Clean up, calculate results, and go home.
+** Also set adjustment flag to indicate no adjust code needed.
+*/
+FreeMemory((farvoid *)abase,&systemerror);
+FreeMemory((farvoid *)bbase,&systemerror);
+
+locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
+
+if(locfourierstruct->adjust==0)
+ locfourierstruct->adjust=1;
+
+return;
+}
+
+/************************
+** DoFPUTransIteration **
+*************************
+** Perform an iteration of the FPU Transcendental/trigonometric
+** benchmark. Here, an iteration consists of calculating the
+** first n fourier coefficients of the function (x+1)^x on
+** the interval 0,2. n is given by arraysize.
+** NOTE: The # of integration steps is fixed at
+** 200.
+*/
+static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */
+ fardouble *bbase, /* B coeffs. */
+ ulong arraysize) /* # of coeffs */
+{
+double omega; /* Fundamental frequency */
+unsigned long i; /* Index */
+unsigned long elapsed; /* Elapsed time */
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Calculate the fourier series. Begin by
+** calculating A[0].
+*/
+
+*abase=TrapezoidIntegrate((double)0.0,
+ (double)2.0,
+ 200,
+ (double)0.0, /* No omega * n needed */
+ 0 )/(double)2.0;
+
+/*
+** Calculate the fundamental frequency.
+** ( 2 * pi ) / period...and since the period
+** is 2, omega is simply pi.
+*/
+omega=(double)3.1415926535897932;
+
+for(i=1;i<arraysize;i++)
+{
+
+ /*
+ ** Calculate A[i] terms. Note, once again, that we
+ ** can ignore the 2/period term outside the integral
+ ** since the period is 2 and the term cancels itself
+ ** out.
+ */
+ *(abase+i)=TrapezoidIntegrate((double)0.0,
+ (double)2.0,
+ 200,
+ omega * (double)i,
+ 1);
+
+ /*
+ ** Calculate the B[i] terms.
+ */
+ *(bbase+i)=TrapezoidIntegrate((double)0.0,
+ (double)2.0,
+ 200,
+ omega * (double)i,
+ 2);
+
+}
+#ifdef DEBUG
+{
+ int i;
+ printf("\nA[i]=\n");
+ for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
+ printf("\nB[i]=\n(undefined) ");
+ for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
+}
+#endif
+/*
+** All done, stop the stopwatch
+*/
+return(StopStopwatch(elapsed));
+}
+
+/***********************
+** TrapezoidIntegrate **
+************************
+** Perform a simple trapezoid integration on the
+** function (x+1)**x.
+** x0,x1 set the lower and upper bounds of the
+** integration.
+** nsteps indicates # of trapezoidal sections
+** omegan is the fundamental frequency times
+** the series member #
+** select = 0 for the A[0] term, 1 for cosine terms, and
+** 2 for sine terms.
+** Returns the value.
+*/
+static double TrapezoidIntegrate( double x0, /* Lower bound */
+ double x1, /* Upper bound */
+ int nsteps, /* # of steps */
+ double omegan, /* omega * n */
+ int select)
+{
+double x; /* Independent variable */
+double dx; /* Stepsize */
+double rvalue; /* Return value */
+
+
+/*
+** Initialize independent variable
+*/
+x=x0;
+
+/*
+** Calculate stepsize
+*/
+dx=(x1 - x0) / (double)nsteps;
+
+/*
+** Initialize the return value.
+*/
+rvalue=thefunction(x0,omegan,select)/(double)2.0;
+
+/*
+** Compute the other terms of the integral.
+*/
+if(nsteps!=1)
+{ --nsteps; /* Already done 1 step */
+ while(--nsteps )
+ {
+ x+=dx;
+ rvalue+=thefunction(x,omegan,select);
+ }
+}
+/*
+** Finish computation
+*/
+rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
+
+return(rvalue);
+}
+
+/****************
+** thefunction **
+*****************
+** This routine selects the function to be used
+** in the Trapezoid integration.
+** x is the independent variable
+** omegan is omega * n
+** select chooses which of the sine/cosine functions
+** are used. note the special case for select=0.
+*/
+static double thefunction(double x, /* Independent variable */
+ double omegan, /* Omega * term */
+ int select) /* Choose term */
+{
+
+/*
+** Use select to pick which function we call.
+*/
+switch(select)
+{
+ case 0: return(pow(x+(double)1.0,x));
+
+ case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
+
+ case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
+}
+
+/*
+** We should never reach this point, but the following
+** keeps compilers from issuing a warning message.
+*/
+return(0.0);
+}
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*************
+** DoAssign **
+**************
+** Perform an assignment algorithm.
+** The algorithm was adapted from the step by step guide found
+** in "Quantitative Decision Making for Business" (Gordon,
+** Pressman, and Cohn; Prentice-Hall)
+**
+**
+** NOTES:
+** 1. Even though the algorithm distinguishes between
+** ASSIGNROWS and ASSIGNCOLS, as though the two might
+** be different, it does presume a square matrix.
+** I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
+** This makes for some algorithmically-correct but
+** probably non-optimal constructs.
+**
+*/
+void DoAssign(void)
+{
+AssignStruct *locassignstruct; /* Local structure ptr */
+farlong *arraybase;
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global structure
+*/
+locassignstruct=&global_assignstruct;
+
+/*
+** Set the error context string.
+*/
+errorcontext="CPU:Assignment";
+
+/*
+** See if we need to do self adjustment code.
+*/
+if(locassignstruct->adjust==0)
+{
+ /*
+ ** Self-adjustment code. The system begins by working on 1
+ ** array. If it does that in no time, then two arrays
+ ** are built. This process continues until
+ ** enough arrays are built to handle the tolerance.
+ */
+ locassignstruct->numarrays=1;
+ while(1)
+ {
+ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *) AllocateMemory(sizeof(long)*
+ ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+
+ /*
+ ** Do an iteration of the assignment alg. If the
+ ** elapsed time is less than or equal to the permitted
+ ** minimum, then allocate for more arrays and
+ ** try again.
+ */
+ if(DoAssignIteration(arraybase,
+ locassignstruct->numarrays)>global_min_ticks)
+ break; /* We're ok...exit */
+
+ FreeMemory((farvoid *)arraybase, &systemerror);
+ locassignstruct->numarrays++;
+ }
+}
+else
+{ /*
+ ** Allocate space for arrays
+ */
+ arraybase=(farlong *)AllocateMemory(sizeof(long)*
+ ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)arraybase,
+ &systemerror);
+ ErrorExit();
+ }
+}
+
+/*
+** All's well if we get here. Do the tests.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoAssignIteration(arraybase,
+ locassignstruct->numarrays);
+ iterations+=(double)1.0;
+} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)arraybase,&systemerror);
+
+locassignstruct->iterspersec=iterations *
+ (double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
+
+if(locassignstruct->adjust==0)
+ locassignstruct->adjust=1;
+
+return;
+
+}
+
+/**********************
+** DoAssignIteration **
+***********************
+** This routine executes one iteration of the assignment test.
+** It returns the number of ticks elapsed in the iteration.
+*/
+static ulong DoAssignIteration(farlong *arraybase,
+ ulong numarrays)
+{
+longptr abase; /* local pointer */
+ulong elapsed; /* Elapsed ticks */
+ulong i;
+
+/*
+** Set up local pointer
+*/
+abase.ptrs.p=arraybase;
+
+/*
+** Load up the arrays with a random table.
+*/
+LoadAssignArrayWithRand(arraybase,numarrays);
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Execute assignment algorithms
+*/
+for(i=0;i<numarrays;i++)
+{ /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+ /* Fixed by Eike Dierks */
+ Assignment(*abase.ptrs.ap);
+ abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+}
+
+/*
+** Get elapsed time
+*/
+return(StopStopwatch(elapsed));
+}
+
+/****************************
+** LoadAssignArrayWithRand **
+*****************************
+** Load the assignment arrays with random numbers. All positive.
+** These numbers represent costs.
+*/
+static void LoadAssignArrayWithRand(farlong *arraybase,
+ ulong numarrays)
+{
+longptr abase,abase1; /* Local for array pointer */
+ulong i;
+
+/*
+** Set local array pointer
+*/
+abase.ptrs.p=arraybase;
+abase1.ptrs.p=arraybase;
+
+/*
+** Set up the first array. Then just copy it into the
+** others.
+*/
+LoadAssign(*(abase.ptrs.ap));
+if(numarrays>1)
+ for(i=1;i<numarrays;i++)
+ { /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
+ /* Fixed by Eike Dierks */
+ abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
+ CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
+ }
+
+return;
+}
+
+/***************
+** LoadAssign **
+****************
+** The array given by arraybase is loaded with positive random
+** numbers. Elements in the array are capped at 5,000,000.
+*/
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
+{
+ushort i,j;
+
+/*
+** Reset random number generator so things repeat.
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+for(i=0;i<ASSIGNROWS;i++)
+ for(j=0;j<ASSIGNROWS;j++){
+ /* arraybase[i][j]=abs_randwc(5000000L);*/
+ arraybase[i][j]=abs_randwc((int32)5000000);
+ }
+
+return;
+}
+
+/*****************
+** CopyToAssign **
+******************
+** Copy the contents of one array to another. This is called by
+** the routine that builds the initial array, and is used to copy
+** the contents of the intial array into all following arrays.
+*/
+static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
+ farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
+{
+ushort i,j;
+
+for(i=0;i<ASSIGNROWS;i++)
+ for(j=0;j<ASSIGNCOLS;j++)
+ arrayto[i][j]=arrayfrom[i][j];
+
+return;
+}
+
+/***************
+** Assignment **
+***************/
+static void Assignment(farlong arraybase[][ASSIGNCOLS])
+{
+short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
+
+/*
+** First, calculate minimum costs
+*/
+calc_minimum_costs(arraybase);
+
+/*
+** Repeat following until the number of rows selected
+** equals the number of rows in the tableau.
+*/
+while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
+{ second_assignments(arraybase,assignedtableau);
+}
+
+#ifdef DEBUG
+{
+ int i,j;
+ printf("\nColumn choices for each row\n");
+ for(i=0;i<ASSIGNROWS;i++)
+ {
+ printf("R%03d: ",i);
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(assignedtableau[i][j]==1)
+ printf("%03d ",j);
+ }
+}
+#endif
+
+return;
+}
+
+/***********************
+** calc_minimum_costs **
+************************
+** Revise the tableau by calculating the minimum costs on a
+** row and column basis. These minima are subtracted from
+** their rows and columns, creating a new tableau.
+*/
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
+{
+ushort i,j; /* Index variables */
+long currentmin; /* Current minimum */
+/*
+** Determine minimum costs on row basis. This is done by
+** subtracting -- on a row-per-row basis -- the minum value
+** for that row.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{
+ currentmin=MAXPOSLONG; /* Initialize minimum */
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(tableau[i][j]<currentmin)
+ currentmin=tableau[i][j];
+
+ for(j=0;j<ASSIGNCOLS;j++)
+ tableau[i][j]-=currentmin;
+}
+
+/*
+** Determine minimum cost on a column basis. This works
+** just as above, only now we step through the array
+** column-wise
+*/
+for(j=0;j<ASSIGNCOLS;j++)
+{
+ currentmin=MAXPOSLONG; /* Initialize minimum */
+ for(i=0;i<ASSIGNROWS;i++)
+ if(tableau[i][j]<currentmin)
+ currentmin=tableau[i][j];
+
+ /*
+ ** Here, we'll take the trouble to see if the current
+ ** minimum is zero. This is likely worth it, since the
+ ** preceding loop will have created at least one zero in
+ ** each row. We can save ourselves a few iterations.
+ */
+ if(currentmin!=0)
+ for(i=0;i<ASSIGNROWS;i++)
+ tableau[i][j]-=currentmin;
+}
+
+return;
+}
+
+/**********************
+** first_assignments **
+***********************
+** Do first assignments.
+** The assignedtableau[] array holds a set of values that
+** indicate the assignment of a value, or its elimination.
+** The values are:
+** 0 = Item is neither assigned nor eliminated.
+** 1 = Item is assigned
+** 2 = Item is eliminated
+** Returns the number of selections made. If this equals
+** the number of rows, then an optimum has been determined.
+*/
+static int first_assignments(long tableau[][ASSIGNCOLS],
+ short assignedtableau[][ASSIGNCOLS])
+{
+ushort i,j,k; /* Index variables */
+ushort numassigns; /* # of assignments */
+ushort totnumassigns; /* Total # of assignments */
+ushort numzeros; /* # of zeros in row */
+int selected=0; /* Flag used to indicate selection */
+
+/*
+** Clear the assignedtableau, setting all members to show that
+** no one is yet assigned, eliminated, or anything.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ for(j=0;j<ASSIGNCOLS;j++)
+ assignedtableau[i][j]=0;
+
+totnumassigns=0;
+do {
+ numassigns=0;
+ /*
+ ** Step through rows. For each one that is not currently
+ ** assigned, see if the row has only one zero in it. If so,
+ ** mark that as an assigned row/col. Eliminate other zeros
+ ** in the same column.
+ */
+ for(i=0;i<ASSIGNROWS;i++)
+ { numzeros=0;
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(tableau[i][j]==0L)
+ if(assignedtableau[i][j]==0)
+ { numzeros++;
+ selected=j;
+ }
+ if(numzeros==1)
+ { numassigns++;
+ totnumassigns++;
+ assignedtableau[i][selected]=1;
+ for(k=0;k<ASSIGNROWS;k++)
+ if((k!=i) &&
+ (tableau[k][selected]==0))
+ assignedtableau[k][selected]=2;
+ }
+ }
+ /*
+ ** Step through columns, doing same as above. Now, be careful
+ ** of items in the other rows of a selected column.
+ */
+ for(j=0;j<ASSIGNCOLS;j++)
+ { numzeros=0;
+ for(i=0;i<ASSIGNROWS;i++)
+ if(tableau[i][j]==0L)
+ if(assignedtableau[i][j]==0)
+ { numzeros++;
+ selected=i;
+ }
+ if(numzeros==1)
+ { numassigns++;
+ totnumassigns++;
+ assignedtableau[selected][j]=1;
+ for(k=0;k<ASSIGNCOLS;k++)
+ if((k!=j) &&
+ (tableau[selected][k]==0))
+ assignedtableau[selected][k]=2;
+ }
+ }
+ /*
+ ** Repeat until no more assignments to be made.
+ */
+} while(numassigns!=0);
+
+/*
+** See if we can leave at this point.
+*/
+if(totnumassigns==ASSIGNROWS) return(totnumassigns);
+
+/*
+** Now step through the array by row. If you find any unassigned
+** zeros, pick the first in the row. Eliminate all zeros from
+** that same row & column. This occurs if there are multiple optima...
+** possibly.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{ selected=-1;
+ for(j=0;j<ASSIGNCOLS;j++)
+ if((tableau[i][j]==0L) &&
+ (assignedtableau[i][j]==0))
+ { selected=j;
+ break;
+ }
+ if(selected!=-1)
+ { assignedtableau[i][selected]=1;
+ totnumassigns++;
+ for(k=0;k<ASSIGNCOLS;k++)
+ if((k!=selected) &&
+ (tableau[i][k]==0L))
+ assignedtableau[i][k]=2;
+ for(k=0;k<ASSIGNROWS;k++)
+ if((k!=i) &&
+ (tableau[k][selected]==0L))
+ assignedtableau[k][selected]=2;
+ }
+}
+
+return(totnumassigns);
+}
+
+/***********************
+** second_assignments **
+************************
+** This section of the algorithm creates the revised
+** tableau, and is difficult to explain. I suggest you
+** refer to the algorithm's source, mentioned in comments
+** toward the beginning of the program.
+*/
+static void second_assignments(long tableau[][ASSIGNCOLS],
+ short assignedtableau[][ASSIGNCOLS])
+{
+int i,j; /* Indexes */
+short linesrow[ASSIGNROWS];
+short linescol[ASSIGNCOLS];
+long smallest; /* Holds smallest value */
+ushort numassigns; /* Number of assignments */
+ushort newrows; /* New rows to be considered */
+/*
+** Clear the linesrow and linescol arrays.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ linesrow[i]=0;
+for(i=0;i<ASSIGNCOLS;i++)
+ linescol[i]=0;
+
+/*
+** Scan rows, flag each row that has no assignment in it.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+{ numassigns=0;
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(assignedtableau[i][j]==1)
+ { numassigns++;
+ break;
+ }
+ if(numassigns==0) linesrow[i]=1;
+}
+
+do {
+
+ newrows=0;
+ /*
+ ** For each row checked above, scan for any zeros. If found,
+ ** check the associated column.
+ */
+ for(i=0;i<ASSIGNROWS;i++)
+ { if(linesrow[i]==1)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(tableau[i][j]==0)
+ linescol[j]=1;
+ }
+
+ /*
+ ** Now scan checked columns. If any contain assigned zeros, check
+ ** the associated row.
+ */
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]==1)
+ for(i=0;i<ASSIGNROWS;i++)
+ if((assignedtableau[i][j]==1) &&
+ (linesrow[i]!=1))
+ {
+ linesrow[i]=1;
+ newrows++;
+ }
+} while(newrows!=0);
+
+/*
+** linesrow[n]==0 indicate rows covered by imaginary line
+** linescol[n]==1 indicate cols covered by imaginary line
+** For all cells not covered by imaginary lines, determine smallest
+** value.
+*/
+smallest=MAXPOSLONG;
+for(i=0;i<ASSIGNROWS;i++)
+ if(linesrow[i]!=0)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]!=1)
+ if(tableau[i][j]<smallest)
+ smallest=tableau[i][j];
+
+/*
+** Subtract smallest from all cells in the above set.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ if(linesrow[i]!=0)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]!=1)
+ tableau[i][j]-=smallest;
+
+/*
+** Add smallest to all cells covered by two lines.
+*/
+for(i=0;i<ASSIGNROWS;i++)
+ if(linesrow[i]==0)
+ for(j=0;j<ASSIGNCOLS;j++)
+ if(linescol[j]==1)
+ tableau[i][j]+=smallest;
+
+return;
+}
+
+/********************
+** IDEA Encryption **
+*********************
+** IDEA - International Data Encryption Algorithm.
+** Based on code presented in Applied Cryptography by Bruce Schneier.
+** Which was based on code developed by Xuejia Lai and James L. Massey.
+** Other modifications made by Colin Plumb.
+**
+*/
+
+/***********
+** DoIDEA **
+************
+** Perform IDEA encryption. Note that we time encryption & decryption
+** time as being a single loop.
+*/
+void DoIDEA(void)
+{
+IDEAStruct *locideastruct; /* Loc pointer to global structure */
+int i;
+IDEAkey Z,DK;
+u16 userkey[8];
+ulong accumtime;
+double iterations;
+char *errorcontext;
+int systemerror;
+faruchar *plain1; /* First plaintext buffer */
+faruchar *crypt1; /* Encryption buffer */
+faruchar *plain2; /* Second plaintext buffer */
+
+/*
+** Link to global data
+*/
+locideastruct=&global_ideastruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:IDEA";
+
+/*
+** Re-init random-number generator.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Build an encryption/decryption key
+*/
+for (i=0;i<8;i++)
+ /* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
+ userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
+for(i=0;i<KEYLEN;i++)
+ Z[i]=0;
+
+/*
+** Compute encryption/decryption subkeys
+*/
+en_key_idea(userkey,Z);
+de_key_idea(Z,DK);
+
+/*
+** Allocate memory for buffers. We'll make 3, called plain1,
+** crypt1, and plain2. It works like this:
+** plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
+** So, plain1 and plain2 should match.
+** Also, fill up plain1 with sample text.
+*/
+plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+ ReportError(errorcontext,systemerror);
+ ErrorExit();
+}
+
+crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)plain1,&systemerror);
+ ErrorExit();
+}
+
+plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
+if(systemerror)
+{
+ ReportError(errorcontext,systemerror);
+ FreeMemory((farvoid *)plain1,&systemerror);
+ FreeMemory((farvoid *)crypt1,&systemerror);
+ ErrorExit();
+}
+/*
+** Note that we build the "plaintext" by simply loading
+** the array up with random numbers.
+*/
+for(i=0;i<locideastruct->arraysize;i++)
+ plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locideastruct->adjust==0)
+{
+ /*
+ ** Do self-adjustment. This involves initializing the
+ ** # of loops and increasing the loop count until we
+ ** get a number of loops that we can use.
+ */
+ for(locideastruct->loops=100L;
+ locideastruct->loops<MAXIDEALOOPS;
+ locideastruct->loops+=10L)
+ if(DoIDEAIteration(plain1,crypt1,plain2,
+ locideastruct->arraysize,
+ locideastruct->loops,
+ Z,DK)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
+ locideastruct->arraysize,
+ locideastruct->loops,Z,DK);
+ iterations+=(double)locideastruct->loops;
+} while(TicksToSecs(accumtime)<locideastruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plain1,&systemerror);
+FreeMemory((farvoid *)crypt1,&systemerror);
+FreeMemory((farvoid *)plain2,&systemerror);
+locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locideastruct->adjust==0)
+ locideastruct->adjust=1;
+
+return;
+
+}
+
+/********************
+** DoIDEAIteration **
+*********************
+** Execute a single iteration of the IDEA encryption algorithm.
+** Actually, a single iteration is one encryption and one
+** decryption.
+*/
+static ulong DoIDEAIteration(faruchar *plain1,
+ faruchar *crypt1,
+ faruchar *plain2,
+ ulong arraysize,
+ ulong nloops,
+ IDEAkey Z,
+ IDEAkey DK)
+{
+register ulong i;
+register ulong j;
+ulong elapsed;
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch.
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops.
+*/
+for(i=0;i<nloops;i++)
+{
+ for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+ cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */
+
+ for(j=0;j<arraysize;j+=(sizeof(u16)*4))
+ cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */
+}
+
+#ifdef DEBUG
+for(j=0;j<arraysize;j++)
+ if(*(plain1+j)!=*(plain2+j)){
+ printf("IDEA Error! \n");
+ status=1;
+ }
+if (status==0) printf("IDEA: OK\n");
+#endif
+
+/*
+** Get elapsed time.
+*/
+return(StopStopwatch(elapsed));
+}
+
+/********
+** mul **
+*********
+** Performs multiplication, modulo (2**16)+1. This code is structured
+** on the assumption that untaken branches are cheaper than taken
+** branches, and that the compiler doesn't schedule branches.
+*/
+static u16 mul(register u16 a, register u16 b)
+{
+register u32 p;
+if(a)
+{ if(b)
+ { p=(u32)(a*b);
+ b=low16(p);
+ a=(u16)(p>>16);
+ return(b-a+(b<a));
+ }
+ else
+ return(1-a);
+}
+else
+ return(1-b);
+}
+
+/********
+** inv **
+*********
+** Compute multiplicative inverse of x, modulo (2**16)+1
+** using Euclid's GCD algorithm. It is unrolled twice
+** to avoid swapping the meaning of the registers. And
+** some subtracts are changed to adds.
+*/
+static u16 inv(u16 x)
+{
+u16 t0, t1;
+u16 q, y;
+
+if(x<=1)
+ return(x); /* 0 and 1 are self-inverse */
+t1=0x10001 / x;
+y=0x10001 % x;
+if(y==1)
+ return(low16(1-t1));
+t0=1;
+do {
+ q=x/y;
+ x=x%y;
+ t0+=q*t1;
+ if(x==1) return(t0);
+ q=y/x;
+ y=y%x;
+ t1+=q*t0;
+} while(y!=1);
+return(low16(1-t1));
+}
+
+/****************
+** en_key_idea **
+*****************
+** Compute IDEA encryption subkeys Z
+*/
+static void en_key_idea(u16 *userkey, u16 *Z)
+{
+int i,j;
+
+/*
+** shifts
+*/
+for(j=0;j<8;j++)
+ Z[j]=*userkey++;
+for(i=0;j<KEYLEN;j++)
+{ i++;
+ Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
+ Z+=i&8;
+ i&=7;
+}
+return;
+}
+
+/****************
+** de_key_idea **
+*****************
+** Compute IDEA decryption subkeys DK from encryption
+** subkeys Z.
+*/
+static void de_key_idea(IDEAkey Z, IDEAkey DK)
+{
+IDEAkey TT;
+int j;
+u16 t1, t2, t3;
+u16 *p;
+p=(u16 *)(TT+KEYLEN);
+
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+
+for(j=1;j<ROUNDS;j++)
+{ t1=*Z++;
+ *--p=*Z++;
+ *--p=t1;
+ t1=inv(*Z++);
+ t2=-*Z++;
+ t3=-*Z++;
+ *--p=inv(*Z++);
+ *--p=t2;
+ *--p=t3;
+ *--p=t1;
+}
+t1=*Z++;
+*--p=*Z++;
+*--p=t1;
+t1=inv(*Z++);
+t2=-*Z++;
+t3=-*Z++;
+*--p=inv(*Z++);
+*--p=t3;
+*--p=t2;
+*--p=t1;
+/*
+** Copy and destroy temp copy
+*/
+for(j=0,p=TT;j<KEYLEN;j++)
+{ *DK++=*p;
+ *p++=0;
+}
+
+return;
+}
+
+/*
+** MUL(x,y)
+** This #define creates a macro that computes x=x*y modulo 0x10001.
+** Requires temps t16 and t32. Also requires y to be strictly 16
+** bits. Here, I am using the simplest form. May not be the
+** fastest. -- RG
+*/
+/* #define MUL(x,y) (x=mul(low16(x),y)) */
+
+/****************
+** cipher_idea **
+*****************
+** IDEA encryption/decryption algorithm.
+*/
+static void cipher_idea(u16 in[4],
+ u16 out[4],
+ register IDEAkey Z)
+{
+register u16 x1, x2, x3, x4, t1, t2;
+/* register u16 t16;
+register u16 t32; */
+int r=ROUNDS;
+
+x1=*in++;
+x2=*in++;
+x3=*in++;
+x4=*in;
+
+do {
+ MUL(x1,*Z++);
+ x2+=*Z++;
+ x3+=*Z++;
+ MUL(x4,*Z++);
+
+ t2=x1^x3;
+ MUL(t2,*Z++);
+ t1=t2+(x2^x4);
+ MUL(t1,*Z++);
+ t2=t1+t2;
+
+ x1^=t1;
+ x4^=t2;
+
+ t2^=x2;
+ x2=x3^t1;
+ x3=t2;
+} while(--r);
+MUL(x1,*Z++);
+*out++=x1;
+*out++=x3+*Z++;
+*out++=x2+*Z++;
+MUL(x4,*Z);
+*out=x4;
+return;
+}
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/**************
+** DoHuffman **
+***************
+** Execute a huffman compression on a block of plaintext.
+** Note that (as with IDEA encryption) an iteration of the
+** Huffman test includes a compression AND a decompression.
+** Also, the compression cycle includes building the
+** Huffman tree.
+*/
+void DoHuffman(void)
+{
+HuffStruct *lochuffstruct; /* Loc pointer to global data */
+char *errorcontext;
+int systemerror;
+ulong accumtime;
+double iterations;
+farchar *comparray;
+farchar *decomparray;
+farchar *plaintext;
+
+/*
+** Link to global data
+*/
+lochuffstruct=&global_huffstruct;
+
+/*
+** Set error context.
+*/
+errorcontext="CPU:Huffman";
+
+/*
+** Allocate memory for the plaintext and the compressed text.
+** We'll be really pessimistic here, and allocate equal amounts
+** for both (though we know...well, we PRESUME) the compressed
+** stuff will take less than the plain stuff.
+** Also note that we'll build a 3rd buffer to decompress
+** into, and we preallocate space for the huffman tree.
+** (We presume that the Huffman tree will grow no larger
+** than 512 bytes. This is actually a super-conservative
+** estimate...but, who cares?)
+*/
+plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ ErrorExit();
+}
+comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory(plaintext,&systemerror);
+ ErrorExit();
+}
+decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory(plaintext,&systemerror);
+ FreeMemory(comparray,&systemerror);
+ ErrorExit();
+}
+
+hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
+ &systemerror);
+if(systemerror)
+{ ReportError(errorcontext,systemerror);
+ FreeMemory(plaintext,&systemerror);
+ FreeMemory(comparray,&systemerror);
+ FreeMemory(decomparray,&systemerror);
+ ErrorExit();
+}
+
+/*
+** Build the plaintext buffer. Since we want this to
+** actually be able to compress, we'll use the
+** wordcatalog to build the plaintext stuff.
+*/
+/*
+** Reset random number generator so things repeat.
+** added by Uwe F. Mayer
+*/
+randnum((int32)13);
+create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
+plaintext[lochuffstruct->arraysize-1L]='\0';
+plaintextlen=lochuffstruct->arraysize;
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(lochuffstruct->adjust==0)
+{
+ /*
+ ** Do self-adjustment. This involves initializing the
+ ** # of loops and increasing the loop count until we
+ ** get a number of loops that we can use.
+ */
+ for(lochuffstruct->loops=100L;
+ lochuffstruct->loops<MAXHUFFLOOPS;
+ lochuffstruct->loops+=10L)
+ if(DoHuffIteration(plaintext,
+ comparray,
+ decomparray,
+ lochuffstruct->arraysize,
+ lochuffstruct->loops,
+ hufftree)>global_min_ticks) break;
+}
+
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoHuffIteration(plaintext,
+ comparray,
+ decomparray,
+ lochuffstruct->arraysize,
+ lochuffstruct->loops,
+ hufftree);
+ iterations+=(double)lochuffstruct->loops;
+} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+FreeMemory((farvoid *)plaintext,&systemerror);
+FreeMemory((farvoid *)comparray,&systemerror);
+FreeMemory((farvoid *)decomparray,&systemerror);
+FreeMemory((farvoid *)hufftree,&systemerror);
+lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(lochuffstruct->adjust==0)
+ lochuffstruct->adjust=1;
+
+}
+
+/*********************
+** create_text_line **
+**********************
+** Create a random line of text, stored at *dt. The line may be
+** no more than nchars long.
+*/
+static void create_text_line(farchar *dt,
+ long nchars)
+{
+long charssofar; /* # of characters so far */
+long tomove; /* # of characters to move */
+char myword[40]; /* Local buffer for words */
+farchar *wordptr; /* Pointer to word from catalog */
+
+charssofar=0;
+
+do {
+/*
+** Grab a random word from the wordcatalog
+*/
+/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
+wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
+MoveMemory((farvoid *)myword,
+ (farvoid *)wordptr,
+ (unsigned long)strlen(wordptr)+1);
+
+/*
+** Append a blank.
+*/
+tomove=strlen(myword)+1;
+myword[tomove-1]=' ';
+
+/*
+** See how long it is. If its length+charssofar > nchars, we have
+** to trim it.
+*/
+if((tomove+charssofar)>nchars)
+ tomove=nchars-charssofar;
+/*
+** Attach the word to the current line. Increment counter.
+*/
+MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
+charssofar+=tomove;
+dt+=tomove;
+
+/*
+** If we're done, bail out. Otherwise, go get another word.
+*/
+} while(charssofar<nchars);
+
+return;
+}
+
+/**********************
+** create_text_block **
+***********************
+** Build a block of text randomly loaded with words. The words
+** come from the wordcatalog (which must be loaded before you
+** call this).
+** *tb points to the memory where the text is to be built.
+** tblen is the # of bytes to put into the text block
+** maxlinlen is the maximum length of any line (line end indicated
+** by a carriage return).
+*/
+static void create_text_block(farchar *tb,
+ ulong tblen,
+ ushort maxlinlen)
+{
+ulong bytessofar; /* # of bytes so far */
+ulong linelen; /* Line length */
+
+bytessofar=0L;
+do {
+
+/*
+** Pick a random length for a line and fill the line.
+** Make sure the line can fit (haven't exceeded tablen) and also
+** make sure you leave room to append a carriage return.
+*/
+linelen=abs_randwc(maxlinlen-6)+6;
+if((linelen+bytessofar)>tblen)
+ linelen=tblen-bytessofar;
+
+if(linelen>1)
+{
+ create_text_line(tb,linelen);
+}
+tb+=linelen-1; /* Add the carriage return */
+*tb++='\n';
+
+bytessofar+=linelen;
+
+} while(bytessofar<tblen);
+
+}
+
+/********************
+** DoHuffIteration **
+*********************
+** Perform the huffman benchmark. This routine
+** (a) Builds the huffman tree
+** (b) Compresses the text
+** (c) Decompresses the text and verifies correct decompression
+*/
+static ulong DoHuffIteration(farchar *plaintext,
+ farchar *comparray,
+ farchar *decomparray,
+ ulong arraysize,
+ ulong nloops,
+ huff_node *hufftree)
+{
+int i; /* Index */
+long j; /* Bigger index */
+int root; /* Pointer to huffman tree root */
+float lowfreq1, lowfreq2; /* Low frequency counters */
+int lowidx1, lowidx2; /* Indexes of low freq. elements */
+long bitoffset; /* Bit offset into text */
+long textoffset; /* Char offset into text */
+long maxbitoffset; /* Holds limit of bit offset */
+long bitstringlen; /* Length of bitstring */
+int c; /* Character from plaintext */
+char bitstring[30]; /* Holds bitstring */
+ulong elapsed; /* For stopwatch */
+#ifdef DEBUG
+int status=0;
+#endif
+
+/*
+** Start the stopwatch
+*/
+elapsed=StartStopwatch();
+
+/*
+** Do everything for nloops
+*/
+while(nloops--)
+{
+
+/*
+** Calculate the frequency of each byte value. Store the
+** results in what will become the "leaves" of the
+** Huffman tree. Interior nodes will be built in those
+** nodes greater than node #255.
+*/
+for(i=0;i<256;i++)
+{
+ hufftree[i].freq=(float)0.0;
+ hufftree[i].c=(unsigned char)i;
+}
+
+for(j=0;j<arraysize;j++)
+ hufftree[(int)plaintext[j]].freq+=(float)1.0;
+
+for(i=0;i<256;i++)
+ if(hufftree[i].freq != (float)0.0)
+ hufftree[i].freq/=(float)arraysize;
+
+/* Reset the second half of the tree. Otherwise the loop below that
+** compares the frequencies up to index 512 makes no sense. Some
+** systems automatically zero out memory upon allocation, others (like
+** for example DEC Unix) do not. Depending on this the loop below gets
+** different data and different run times. On our alpha the data that
+** was arbitrarily assigned led to an underflow error at runtime. We
+** use that zeroed-out bits are in fact 0 as a float.
+** Uwe F. Mayer */
+bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
+/*
+** Build the huffman tree. First clear all the parent
+** pointers and left/right pointers. Also, discard all
+** nodes that have a frequency of true 0. */
+for(i=0;i<512;i++)
+{ if(hufftree[i].freq==(float)0.0)
+ hufftree[i].parent=EXCLUDED;
+ else
+ hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
+}
+
+/*
+** Go through the tree. Finding nodes of really low
+** frequency.
+*/
+root=255; /* Starting root node-1 */
+while(1)
+{
+ lowfreq1=(float)2.0; lowfreq2=(float)2.0;
+ lowidx1=-1; lowidx2=-1;
+ /*
+ ** Find first lowest frequency.
+ */
+ for(i=0;i<=root;i++)
+ if(hufftree[i].parent<0)
+ if(hufftree[i].freq<lowfreq1)
+ { lowfreq1=hufftree[i].freq;
+ lowidx1=i;
+ }
+
+ /*
+ ** Did we find a lowest value? If not, the
+ ** tree is done.
+ */
+ if(lowidx1==-1) break;
+
+ /*
+ ** Find next lowest frequency
+ */
+ for(i=0;i<=root;i++)
+ if((hufftree[i].parent<0) && (i!=lowidx1))
+ if(hufftree[i].freq<lowfreq2)
+ { lowfreq2=hufftree[i].freq;
+ lowidx2=i;
+ }
+
+ /*
+ ** If we could only find one item, then that
+ ** item is surely the root, and (as above) the
+ ** tree is done.
+ */
+ if(lowidx2==-1) break;
+
+ /*
+ ** Attach the two new nodes to the current root, and
+ ** advance the current root.
+ */
+ root++; /* New root */
+ hufftree[lowidx1].parent=root;
+ hufftree[lowidx2].parent=root;
+ hufftree[root].freq=lowfreq1+lowfreq2;
+ hufftree[root].left=lowidx1;
+ hufftree[root].right=lowidx2;
+ hufftree[root].parent=-2; /* Show root */
+}
+
+/*
+** Huffman tree built...compress the plaintext
+*/
+bitoffset=0L; /* Initialize bit offset */
+for(i=0;i<arraysize;i++)
+{
+ c=(int)plaintext[i]; /* Fetch character */
+ /*
+ ** Build a bit string for byte c
+ */
+ bitstringlen=0;
+ while(hufftree[c].parent!=-2)
+ { if(hufftree[hufftree[c].parent].left==c)
+ bitstring[bitstringlen]='0';
+ else
+ bitstring[bitstringlen]='1';
+ c=hufftree[c].parent;
+ bitstringlen++;
+ }
+
+ /*
+ ** Step backwards through the bit string, setting
+ ** bits in the compressed array as you go.
+ */
+ while(bitstringlen--)
+ { SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
+ bitoffset++;
+ }
+}
+
+/*
+** Compression done. Perform de-compression.
+*/
+maxbitoffset=bitoffset;
+bitoffset=0;
+textoffset=0;
+do {
+ i=root;
+ while(hufftree[i].left!=-1)
+ { if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
+ i=hufftree[i].left;
+ else
+ i=hufftree[i].right;
+ bitoffset++;
+ }
+ decomparray[textoffset]=hufftree[i].c;
+
+#ifdef DEBUG
+ if(hufftree[i].c != plaintext[textoffset])
+ {
+ /* Show error */
+ printf("Error at textoffset %ld\n",textoffset);
+ status=1;
+ }
+#endif
+ textoffset++;
+} while(bitoffset<maxbitoffset);
+
+} /* End the big while(nloops--) from above */
+
+/*
+** All done
+*/
+#ifdef DEBUG
+ if (status==0) printf("Huffman: OK\n");
+#endif
+return(StopStopwatch(elapsed));
+}
+
+/***************
+** SetCompBit **
+****************
+** Set a bit in the compression array. The value of the
+** bit is set according to char bitchar.
+*/
+static void SetCompBit(u8 *comparray,
+ u32 bitoffset,
+ char bitchar)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** First calculate which element in the comparray to
+** alter. and the bitnumber.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Set or clear
+*/
+if(bitchar=='1')
+ comparray[byteoffset]|=(1<<bitnumb);
+else
+ comparray[byteoffset]&=~(1<<bitnumb);
+
+return;
+}
+
+/***************
+** GetCompBit **
+****************
+** Return the bit value of a bit in the comparession array.
+** Returns 0 if the bit is clear, nonzero otherwise.
+*/
+static int GetCompBit(u8 *comparray,
+ u32 bitoffset)
+{
+u32 byteoffset;
+int bitnumb;
+
+/*
+** Calculate byte offset and bit number.
+*/
+byteoffset=bitoffset>>3;
+bitnumb=bitoffset % 8;
+
+/*
+** Fetch
+*/
+return((1<<bitnumb) & comparray[byteoffset] );
+}
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+*********************************
+** This code is a modified version of the code
+** that was submitted to BYTE Magazine by
+** Maureen Caudill. It accomanied an article
+** that I CANNOT NOW RECALL.
+** The author's original heading/comment was
+** as follows:
+**
+** Backpropagation Network
+** Written by Maureen Caudill
+** in Think C 4.0 on a Macintosh
+**
+** (c) Maureen Caudill 1988-1991
+** This network will accept 5x7 input patterns
+** and produce 8 bit output patterns.
+** The source code may be copied or modified without restriction,
+** but no fee may be charged for its use.
+**
+** ++++++++++++++
+** I have modified the code so that it will work
+** on systems other than a Macintosh -- RG
+*/
+
+/***********
+** DoNNet **
+************
+** Perform the neural net benchmark.
+** Note that this benchmark is one of the few that
+** requires an input file. That file is "NNET.DAT" and
+** should be on the local directory (from which the
+** benchmark program in launched).
+*/
+void DoNNET(void)
+{
+NNetStruct *locnnetstruct; /* Local ptr to global data */
+char *errorcontext;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+locnnetstruct=&global_nnetstruct;
+
+/*
+** Set error context
+*/
+errorcontext="CPU:NNET";
+
+/*
+** Init random number generator.
+** NOTE: It is important that the random number generator
+** be re-initialized for every pass through this test.
+** The NNET algorithm uses the random number generator
+** to initialize the net. Results are sensitive to
+** the initial neural net state.
+*/
+/* randnum(3L); */
+randnum((int32)3);
+
+/*
+** Read in the input and output patterns. We'll do this
+** only once here at the beginning. These values don't
+** change once loaded.
+*/
+if(read_data_file()!=0)
+ ErrorExit();
+
+
+/*
+** See if we need to perform self adjustment loop.
+*/
+if(locnnetstruct->adjust==0)
+{
+ /*
+ ** Do self-adjustment. This involves initializing the
+ ** # of loops and increasing the loop count until we
+ ** get a number of loops that we can use.
+ */
+ for(locnnetstruct->loops=1L;
+ locnnetstruct->loops<MAXNNETLOOPS;
+ locnnetstruct->loops++)
+ { /*randnum(3L); */
+ randnum((int32)3);
+ if(DoNNetIteration(locnnetstruct->loops)
+ >global_min_ticks) break;
+ }
+}
+
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ /* randnum(3L); */ /* Gotta do this for Neural Net */
+ randnum((int32)3); /* Gotta do this for Neural Net */
+ accumtime+=DoNNetIteration(locnnetstruct->loops);
+ iterations+=(double)locnnetstruct->loops;
+} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(locnnetstruct->adjust==0)
+ locnnetstruct->adjust=1;
+
+
+return;
+}
+
+/********************
+** DoNNetIteration **
+*********************
+** Do a single iteration of the neural net benchmark.
+** By iteration, we mean a "learning" pass.
+*/
+static ulong DoNNetIteration(ulong nloops)
+{
+ulong elapsed; /* Elapsed time */
+int patt;
+
+/*
+** Run nloops learning cycles. Notice that, counted with
+** the learning cycle is the weight randomization and
+** zeroing of changes. This should reduce clock jitter,
+** since we don't have to stop and start the clock for
+** each iteration.
+*/
+elapsed=StartStopwatch();
+while(nloops--)
+{
+ randomize_wts();
+ zero_changes();
+ iteration_count=1;
+ learned = F;
+ numpasses = 0;
+ while (learned == F)
+ {
+ for (patt=0; patt<numpats; patt++)
+ {
+ worst_error = 0.0; /* reset this every pass through data */
+ move_wt_changes(); /* move last pass's wt changes to momentum array */
+ do_forward_pass(patt);
+ do_back_pass(patt);
+ iteration_count++;
+ }
+ numpasses ++;
+ learned = check_out_error();
+ }
+#ifdef DEBUG
+printf("Learned in %d passes\n",numpasses);
+#endif
+}
+return(StopStopwatch(elapsed));
+}
+
+/*************************
+** do_mid_forward(patt) **
+**************************
+** Process the middle layer's forward pass
+** The activation of middle layer's neurode is the weighted
+** sum of the inputs from the input pattern, with sigmoid
+** function applied to the inputs.
+**/
+static void do_mid_forward(int patt)
+{
+double sum;
+int neurode, i;
+
+for (neurode=0;neurode<MID_SIZE; neurode++)
+{
+ sum = 0.0;
+ for (i=0; i<IN_SIZE; i++)
+ { /* compute weighted sum of input signals */
+ sum += mid_wts[neurode][i]*in_pats[patt][i];
+ }
+ /*
+ ** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
+ */
+ sum = 1.0/(1.0+exp(-sum));
+ mid_out[neurode] = sum;
+}
+return;
+}
+
+/*********************
+** do_out_forward() **
+**********************
+** process the forward pass through the output layer
+** The activation of the output layer is the weighted sum of
+** the inputs (outputs from middle layer), modified by the
+** sigmoid function.
+**/
+static void do_out_forward()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ sum = 0.0;
+ for (i=0; i<MID_SIZE; i++)
+ { /*
+ ** compute weighted sum of input signals
+ ** from middle layer
+ */
+ sum += out_wts[neurode][i]*mid_out[i];
+ }
+ /*
+ ** Apply f(x) = 1/(1+exp(-x)) to weighted input
+ */
+ sum = 1.0/(1.0+exp(-sum));
+ out_out[neurode] = sum;
+}
+return;
+}
+
+/*************************
+** display_output(patt) **
+**************************
+** Display the actual output vs. the desired output of the
+** network.
+** Once the training is complete, and the "learned" flag set
+** to TRUE, then display_output sends its output to both
+** the screen and to a text output file.
+**
+** NOTE: This routine has been disabled in the benchmark
+** version. -- RG
+**/
+/*
+void display_output(int patt)
+{
+int i;
+
+ fprintf(outfile,"\n Iteration # %d",iteration_count);
+ fprintf(outfile,"\n Desired Output: ");
+
+ for (i=0; i<OUT_SIZE; i++)
+ {
+ fprintf(outfile,"%6.3f ",out_pats[patt][i]);
+ }
+ fprintf(outfile,"\n Actual Output: ");
+
+ for (i=0; i<OUT_SIZE; i++)
+ {
+ fprintf(outfile,"%6.3f ",out_out[i]);
+ }
+ fprintf(outfile,"\n");
+ return;
+}
+*/
+
+/**********************
+** do_forward_pass() **
+***********************
+** control function for the forward pass through the network
+** NOTE: I have disabled the call to display_output() in
+** the benchmark version -- RG.
+**/
+static void do_forward_pass(int patt)
+{
+do_mid_forward(patt); /* process forward pass, middle layer */
+do_out_forward(); /* process forward pass, output layer */
+/* display_output(patt); ** display results of forward pass */
+return;
+}
+
+/***********************
+** do_out_error(patt) **
+************************
+** Compute the error for the output layer neurodes.
+** This is simply Desired - Actual.
+**/
+static void do_out_error(int patt)
+{
+int neurode;
+double error,tot_error, sum;
+
+tot_error = 0.0;
+sum = 0.0;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
+ /*
+ ** while we're here, also compute magnitude
+ ** of total error and worst error in this pass.
+ ** We use these to decide if we are done yet.
+ */
+ error = out_error[neurode];
+ if (error <0.0)
+ {
+ sum += -error;
+ if (-error > tot_error)
+ tot_error = -error; /* worst error this pattern */
+ }
+ else
+ {
+ sum += error;
+ if (error > tot_error)
+ tot_error = error; /* worst error this pattern */
+ }
+}
+avg_out_error[patt] = sum/OUT_SIZE;
+tot_out_error[patt] = tot_error;
+return;
+}
+
+/***********************
+** worst_pass_error() **
+************************
+** Find the worst and average error in the pass and save it
+**/
+static void worst_pass_error()
+{
+double error,sum;
+
+int i;
+
+error = 0.0;
+sum = 0.0;
+for (i=0; i<numpats; i++)
+{
+ if (tot_out_error[i] > error) error = tot_out_error[i];
+ sum += avg_out_error[i];
+}
+worst_error = error;
+average_error = sum/numpats;
+return;
+}
+
+/*******************
+** do_mid_error() **
+********************
+** Compute the error for the middle layer neurodes
+** This is based on the output errors computed above.
+** Note that the derivative of the sigmoid f(x) is
+** f'(x) = f(x)(1 - f(x))
+** Recall that f(x) is merely the output of the middle
+** layer neurode on the forward pass.
+**/
+static void do_mid_error()
+{
+double sum;
+int neurode, i;
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+ sum = 0.0;
+ for (i=0; i<OUT_SIZE; i++)
+ sum += out_wts[i][neurode]*out_error[i];
+
+ /*
+ ** apply the derivative of the sigmoid here
+ ** Because of the choice of sigmoid f(I), the derivative
+ ** of the sigmoid is f'(I) = f(I)(1 - f(I))
+ */
+ mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
+}
+return;
+}
+
+/*********************
+** adjust_out_wts() **
+**********************
+** Adjust the weights of the output layer. The error for
+** the output layer has been previously propagated back to
+** the middle layer.
+** Use the Delta Rule with momentum term to adjust the weights.
+**/
+static void adjust_out_wts()
+{
+int weight, neurode;
+double learn,delta,alph;
+
+learn = BETA;
+alph = ALPHA;
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ for (weight=0; weight<MID_SIZE; weight++)
+ {
+ /* standard delta rule */
+ delta = learn * out_error[neurode] * mid_out[weight];
+
+ /* now the momentum term */
+ delta += alph * out_wt_change[neurode][weight];
+ out_wts[neurode][weight] += delta;
+
+ /* keep track of this pass's cum wt changes for next pass's momentum */
+ out_wt_cum_change[neurode][weight] += delta;
+ }
+}
+return;
+}
+
+/*************************
+** adjust_mid_wts(patt) **
+**************************
+** Adjust the middle layer weights using the previously computed
+** errors.
+** We use the Generalized Delta Rule with momentum term
+**/
+static void adjust_mid_wts(int patt)
+{
+int weight, neurode;
+double learn,alph,delta;
+
+learn = BETA;
+alph = ALPHA;
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+ for (weight=0; weight<IN_SIZE; weight++)
+ {
+ /* first the basic delta rule */
+ delta = learn * mid_error[neurode] * in_pats[patt][weight];
+
+ /* with the momentum term */
+ delta += alph * mid_wt_change[neurode][weight];
+ mid_wts[neurode][weight] += delta;
+
+ /* keep track of this pass's cum wt changes for next pass's momentum */
+ mid_wt_cum_change[neurode][weight] += delta;
+ }
+}
+return;
+}
+
+/*******************
+** do_back_pass() **
+********************
+** Process the backward propagation of error through network.
+**/
+void do_back_pass(int patt)
+{
+
+do_out_error(patt);
+do_mid_error();
+adjust_out_wts();
+adjust_mid_wts(patt);
+
+return;
+}
+
+
+/**********************
+** move_wt_changes() **
+***********************
+** Move the weight changes accumulated last pass into the wt-change
+** array for use by the momentum term in this pass. Also zero out
+** the accumulating arrays after the move.
+**/
+static void move_wt_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+ for (j = 0; j<IN_SIZE; j++)
+ {
+ mid_wt_change[i][j] = mid_wt_cum_change[i][j];
+ /*
+ ** Zero it out for next pass accumulation.
+ */
+ mid_wt_cum_change[i][j] = 0.0;
+ }
+
+for (i = 0; i<OUT_SIZE; i++)
+ for (j=0; j<MID_SIZE; j++)
+ {
+ out_wt_change[i][j] = out_wt_cum_change[i][j];
+ out_wt_cum_change[i][j] = 0.0;
+ }
+
+return;
+}
+
+/**********************
+** check_out_error() **
+***********************
+** Check to see if the error in the output layer is below
+** MARGIN*OUT_SIZE for all output patterns. If so, then
+** assume the network has learned acceptably well. This
+** is simply an arbitrary measure of how well the network
+** has learned -- many other standards are possible.
+**/
+static int check_out_error()
+{
+int result,i,error;
+
+result = T;
+error = F;
+worst_pass_error(); /* identify the worst error in this pass */
+
+/*
+#ifdef DEBUG
+printf("\n Iteration # %d",iteration_count);
+#endif
+*/
+for (i=0; i<numpats; i++)
+{
+/* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
+ i+1,tot_out_error[i], avg_out_error[i]);
+ fprintf(outfile,
+ "\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
+ i+1,tot_out_error[i]);
+*/
+
+ if (worst_error >= STOP) result = F;
+ if (tot_out_error[i] >= 16.0) error = T;
+}
+
+if (error == T) result = ERR;
+
+
+#ifdef DEBUG
+/* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
+ worst_error,average_error);
+*/
+/* fprintf(outfile,
+ "\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
+ worst_error, average_error); */
+#endif
+
+return(result);
+}
+
+
+/*******************
+** zero_changes() **
+********************
+** Zero out all the wt change arrays
+**/
+static void zero_changes()
+{
+int i,j;
+
+for (i = 0; i<MID_SIZE; i++)
+{
+ for (j=0; j<IN_SIZE; j++)
+ {
+ mid_wt_change[i][j] = 0.0;
+ mid_wt_cum_change[i][j] = 0.0;
+ }
+}
+
+for (i = 0; i< OUT_SIZE; i++)
+{
+ for (j=0; j<MID_SIZE; j++)
+ {
+ out_wt_change[i][j] = 0.0;
+ out_wt_cum_change[i][j] = 0.0;
+ }
+}
+return;
+}
+
+
+/********************
+** randomize_wts() **
+*********************
+** Intialize the weights in the middle and output layers to
+** random values between -0.25..+0.25
+** Function rand() returns a value between 0 and 32767.
+**
+** NOTE: Had to make alterations to how the random numbers were
+** created. -- RG.
+**/
+static void randomize_wts()
+{
+int neurode,i;
+double value;
+
+/*
+** Following not used int benchmark version -- RG
+**
+** printf("\n Please enter a random number seed (1..32767): ");
+** scanf("%d", &i);
+** srand(i);
+*/
+
+for (neurode = 0; neurode<MID_SIZE; neurode++)
+{
+ for(i=0; i<IN_SIZE; i++)
+ {
+ /* value=(double)abs_randwc(100000L); */
+ value=(double)abs_randwc((int32)100000);
+ value=value/(double)100000.0 - (double) 0.5;
+ mid_wts[neurode][i] = value/2;
+ }
+}
+for (neurode=0; neurode<OUT_SIZE; neurode++)
+{
+ for(i=0; i<MID_SIZE; i++)
+ {
+ /* value=(double)abs_randwc(100000L); */
+ value=(double)abs_randwc((int32)100000);
+ value=value/(double)10000.0 - (double) 0.5;
+ out_wts[neurode][i] = value/2;
+ }
+}
+
+return;
+}
+
+
+/*********************
+** read_data_file() **
+**********************
+** Read in the input data file and store the patterns in
+** in_pats and out_pats.
+** The format for the data file is as follows:
+**
+** line# data expected
+** ----- ------------------------------
+** 1 In-X-size,in-y-size,out-size
+** 2 number of patterns in file
+** 3 1st X row of 1st input pattern
+** 4.. following rows of 1st input pattern pattern
+** in-x+2 y-out pattern
+** 1st X row of 2nd pattern
+** etc.
+**
+** Each row of data is separated by commas or spaces.
+** The data is expected to be ascii text corresponding to
+** either a +1 or a 0.
+**
+** Sample input for a 1-pattern file (The comments to the
+** right may NOT be in the file unless more sophisticated
+** parsing of the input is done.):
+**
+** 5,7,8 input is 5x7 grid, output is 8 bits
+** 1 one pattern in file
+** 0,1,1,1,0 beginning of pattern for "O"
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,1
+** 1,0,0,0,0
+** 0,1,1,1,0
+** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111
+**
+** Clearly, this simple scheme can be expanded or enhanced
+** any way you like.
+**
+** Returns -1 if any file error occurred, otherwise 0.
+**/
+static int read_data_file()
+{
+FILE *infile;
+
+int xinsize,yinsize,youtsize;
+int patt, element, i, row;
+int vals_read;
+int val1,val2,val3,val4,val5,val6,val7,val8;
+
+/* printf("\n Opening and retrieving data from file."); */
+
+infile = fopen(inpath, "r");
+if (infile == NULL)
+{
+ printf("\n CPU:NNET--error in opening file!");
+ return -1 ;
+}
+vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize);
+if (vals_read != 3)
+{
+ printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
+ return -1;
+}
+vals_read=fscanf(infile,"%d",&numpats);
+if (vals_read !=1)
+{
+ printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
+ return -1;
+}
+if (numpats > MAXPATS)
+ numpats = MAXPATS;
+
+for (patt=0; patt<numpats; patt++)
+{
+ element = 0;
+ for (row = 0; row<yinsize; row++)
+ {
+ vals_read = fscanf(infile,"%d %d %d %d %d",
+ &val1, &val2, &val3, &val4, &val5);
+ if (vals_read != 5)
+ {
+ printf ("\n CPU:NNET -- failure in reading input!");
+ return -1;
+ }
+ element=row*xinsize;
+
+ in_pats[patt][element] = (double) val1; element++;
+ in_pats[patt][element] = (double) val2; element++;
+ in_pats[patt][element] = (double) val3; element++;
+ in_pats[patt][element] = (double) val4; element++;
+ in_pats[patt][element] = (double) val5; element++;
+ }
+ for (i=0;i<IN_SIZE; i++)
+ {
+ if (in_pats[patt][i] >= 0.9)
+ in_pats[patt][i] = 0.9;
+ if (in_pats[patt][i] <= 0.1)
+ in_pats[patt][i] = 0.1;
+ }
+ element = 0;
+ vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d",
+ &val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
+
+ out_pats[patt][element] = (double) val1; element++;
+ out_pats[patt][element] = (double) val2; element++;
+ out_pats[patt][element] = (double) val3; element++;
+ out_pats[patt][element] = (double) val4; element++;
+ out_pats[patt][element] = (double) val5; element++;
+ out_pats[patt][element] = (double) val6; element++;
+ out_pats[patt][element] = (double) val7; element++;
+ out_pats[patt][element] = (double) val8; element++;
+}
+
+/* printf("\n Closing the input file now. "); */
+
+fclose(infile);
+return(0);
+}
+
+/*********************
+** initialize_net() **
+**********************
+** Do all the initialization stuff before beginning
+*/
+/*
+static int initialize_net()
+{
+int err_code;
+
+randomize_wts();
+zero_changes();
+err_code = read_data_file();
+iteration_count = 1;
+return(err_code);
+}
+*/
+
+/**********************
+** display_mid_wts() **
+***********************
+** Display the weights on the middle layer neurodes
+** NOTE: This routine is not used in the benchmark
+** test -- RG
+**/
+/* static void display_mid_wts()
+{
+int neurode, weight, row, col;
+
+fprintf(outfile,"\n Weights of Middle Layer neurodes:");
+
+for (neurode=0; neurode<MID_SIZE; neurode++)
+{
+ fprintf(outfile,"\n Mid Neurode # %d",neurode);
+ for (row=0; row<IN_Y_SIZE; row++)
+ {
+ fprintf(outfile,"\n ");
+ for (col=0; col<IN_X_SIZE; col++)
+ {
+ weight = IN_X_SIZE * row + col;
+ fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
+ }
+ }
+}
+return;
+}
+*/
+/**********************
+** display_out_wts() **
+***********************
+** Display the weights on the output layer neurodes
+** NOTE: This code is not used in the benchmark
+** test -- RG
+*/
+/* void display_out_wts()
+{
+int neurode, weight;
+
+ fprintf(outfile,"\n Weights of Output Layer neurodes:");
+
+ for (neurode=0; neurode<OUT_SIZE; neurode++)
+ {
+ fprintf(outfile,"\n Out Neurode # %d \n",neurode);
+ for (weight=0; weight<MID_SIZE; weight++)
+ {
+ fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
+ }
+ }
+ return;
+}
+*/
+
+/***********************
+** LU DECOMPOSITION **
+** (Linear Equations) **
+************************
+** These routines come from "Numerical Recipes in Pascal".
+** Note that, as in the assignment algorithm, though we
+** separately define LUARRAYROWS and LUARRAYCOLS, the two
+** must be the same value (this routine depends on a square
+** matrix).
+*/
+
+/*********
+** DoLU **
+**********
+** Perform the LU decomposition benchmark.
+*/
+void DoLU(void)
+{
+LUStruct *loclustruct; /* Local pointer to global data */
+char *errorcontext;
+int systemerror;
+fardouble *a;
+fardouble *b;
+fardouble *abase;
+fardouble *bbase;
+LUdblptr ptra;
+int n;
+int i;
+ulong accumtime;
+double iterations;
+
+/*
+** Link to global data
+*/
+loclustruct=&global_lustruct;
+
+/*
+** Set error context.
+*/
+errorcontext="FPU:LU";
+
+/*
+** Our first step is to build a "solvable" problem. This
+** will become the "seed" set that all others will be
+** derived from. (I.E., we'll simply copy these arrays
+** into the others.
+*/
+a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
+ &systemerror);
+b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
+ &systemerror);
+n=LUARRAYROWS;
+
+/*
+** We need to allocate a temp vector that is used by the LU
+** algorithm. This removes the allocation routine from the
+** timing.
+*/
+LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
+ &systemerror);
+
+/*
+** Build a problem to be solved.
+*/
+ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */
+build_problem(*ptra.ptrs.ap,n,b);
+
+/*
+** Now that we have a problem built, see if we need to do
+** auto-adjust. If so, repeatedly call the DoLUIteration routine,
+** increasing the number of solutions per iteration as you go.
+*/
+if(loclustruct->adjust==0)
+{
+ loclustruct->numarrays=0;
+ for(i=1;i<=MAXLUARRAYS;i++)
+ {
+ abase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+ ErrorExit();
+ }
+ bbase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYROWS*(i+1),&systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,abase,(fardouble *)NULL);
+ ErrorExit();
+ }
+ if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
+ { loclustruct->numarrays=i;
+ break;
+ }
+ /*
+ ** Not enough arrays...free them all and try again
+ */
+ FreeMemory((farvoid *)abase,&systemerror);
+ FreeMemory((farvoid *)bbase,&systemerror);
+ }
+ /*
+ ** Were we able to do it?
+ */
+ if(loclustruct->numarrays==0)
+ { printf("FPU:LU -- Array limit reached\n");
+ LUFreeMem(a,b,abase,bbase);
+ ErrorExit();
+ }
+}
+else
+{ /*
+ ** Don't need to adjust -- just allocate the proper
+ ** number of arrays and proceed.
+ */
+ abase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
+ &systemerror);
+ if(systemerror)
+ { ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
+ ErrorExit();
+ }
+ bbase=(fardouble *)AllocateMemory(sizeof(double) *
+ LUARRAYROWS*loclustruct->numarrays,&systemerror);
+ if(systemerror)
+ {
+ ReportError(errorcontext,systemerror);
+ LUFreeMem(a,b,abase,(fardouble *)NULL);
+ ErrorExit();
+ }
+}
+/*
+** All's well if we get here. Do the test.
+*/
+accumtime=0L;
+iterations=(double)0.0;
+
+do {
+ accumtime+=DoLUIteration(a,b,abase,bbase,
+ loclustruct->numarrays);
+ iterations+=(double)loclustruct->numarrays;
+} while(TicksToSecs(accumtime)<loclustruct->request_secs);
+
+/*
+** Clean up, calculate results, and go home. Be sure to
+** show that we don't have to rerun adjustment code.
+*/
+loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
+
+if(loclustruct->adjust==0)
+ loclustruct->adjust=1;
+
+LUFreeMem(a,b,abase,bbase);
+return;
+}
+
+/**************
+** LUFreeMem **
+***************
+** Release memory associated with LU benchmark.
+*/
+static void LUFreeMem(fardouble *a, fardouble *b,
+ fardouble *abase,fardouble *bbase)
+{
+int systemerror;
+
+FreeMemory((farvoid *)a,&systemerror);
+FreeMemory((farvoid *)b,&systemerror);
+FreeMemory((farvoid *)LUtempvv,&systemerror);
+
+if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
+if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
+return;
+}
+
+/******************
+** DoLUIteration **
+*******************
+** Perform an iteration of the LU decomposition benchmark.
+** An iteration refers to the repeated solution of several
+** identical matrices.
+*/
+static ulong DoLUIteration(fardouble *a,fardouble *b,
+ fardouble *abase, fardouble *bbase,
+ ulong numarrays)
+{
+fardouble *locabase;
+fardouble *locbbase;
+LUdblptr ptra; /* For converting ptr to 2D array */
+ulong elapsed;
+ulong j,i; /* Indexes */
+
+
+/*
+** Move the seed arrays (a & b) into the destination
+** arrays;
+*/
+for(j=0;j<numarrays;j++)
+{ locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
+ locbbase=bbase+j*LUARRAYROWS;
+ for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
+ *(locabase+i)=*(a+i);
+ for(i=0;i<LUARRAYROWS;i++)
+ *(locbbase+i)=*(b+i);
+}
+
+/*
+** Do test...begin timing.
+*/
+elapsed=StartStopwatch();
+for(i=0;i<numarrays;i++)
+{ locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
+ locbbase=bbase+i*LUARRAYROWS;
+ ptra.ptrs.p=locabase;
+ lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
+}
+
+return(StopStopwatch(elapsed));
+}
+
+/******************
+** build_problem **
+*******************
+** Constructs a solvable set of linear equations. It does this by
+** creating an identity matrix, then loading the solution vector
+** with random numbers. After that, the identity matrix and
+** solution vector are randomly "scrambled". Scrambling is
+** done by (a) randomly selecting a row and multiplying that
+** row by a random number and (b) adding one randomly-selected
+** row to another.
+*/
+static void build_problem(double a[][LUARRAYCOLS],
+ int n,
+ double b[LUARRAYROWS])
+{
+long i,j,k,k1; /* Indexes */
+double rcon; /* Random constant */
+
+/*
+** Reset random number generator
+*/
+/* randnum(13L); */
+randnum((int32)13);
+
+/*
+** Build an identity matrix.
+** We'll also use this as a chance to load the solution
+** vector.
+*/
+for(i=0;i<n;i++)
+{ /* b[i]=(double)(abs_randwc(100L)+1L); */
+ b[i]=(double)(abs_randwc((int32)100)+(int32)1);
+ for(j=0;j<n;j++)
+ if(i==j)
+ /* a[i][j]=(double)(abs_randwc(1000L)+1L); */
+ a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
+ else
+ a[i][j]=(double)0.0;
+}
+
+#ifdef DEBUG
+printf("Problem:\n");
+for(i=0;i<n;i++)
+{
+/*
+ for(j=0;j<n;j++)
+ printf("%6.2f ",a[i][j]);
+*/
+ printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
+/*
+ printf("\n");
+*/
+}
+#endif
+
+/*
+** Scramble. Do this 8n times. See comment above for
+** a description of the scrambling process.
+*/
+
+for(i=0;i<8*n;i++)
+{
+ /*
+ ** Pick a row and a random constant. Multiply
+ ** all elements in the row by the constant.
+ */
+ /* k=abs_randwc((long)n);
+ rcon=(double)(abs_randwc(20L)+1L);
+ for(j=0;j<n;j++)
+ a[k][j]=a[k][j]*rcon;
+ b[k]=b[k]*rcon;
+*/
+ /*
+ ** Pick two random rows and add second to
+ ** first. Note that we also occasionally multiply
+ ** by minus 1 so that we get a subtraction operation.
+ */
+ /* k=abs_randwc((long)n); */
+ /* k1=abs_randwc((long)n); */
+ k=abs_randwc((int32)n);
+ k1=abs_randwc((int32)n);
+ if(k!=k1)
+ {
+ if(k<k1) rcon=(double)1.0;
+ else rcon=(double)-1.0;
+ for(j=0;j<n;j++)
+ a[k][j]+=a[k1][j]*rcon;;
+ b[k]+=b[k1]*rcon;
+ }
+}
+
+return;
+}
+
+
+/***********
+** ludcmp **
+************
+** From the procedure of the same name in "Numerical Recipes in Pascal",
+** by Press, Flannery, Tukolsky, and Vetterling.
+** Given an nxn matrix a[], this routine replaces it by the LU
+** decomposition of a rowwise permutation of itself. a[] and n
+** are input. a[] is output, modified as follows:
+** -- --
+** | b(1,1) b(1,2) b(1,3)... |
+** | a(2,1) b(2,2) b(2,3)... |
+** | a(3,1) a(3,2) b(3,3)... |
+** | a(4,1) a(4,2) a(4,3)... |
+** | ... |
+** -- --
+**
+** Where the b(i,j) elements form the upper triangular matrix of the
+** LU decomposition, and the a(i,j) elements form the lower triangular
+** elements. The LU decomposition is calculated so that we don't
+** need to store the a(i,i) elements (which would have laid along the
+** diagonal and would have all been 1).
+**
+** indx[] is an output vector that records the row permutation
+** effected by the partial pivoting; d is output as +/-1 depending
+** on whether the number of row interchanges was even or odd,
+** respectively.
+** Returns 0 if matrix singular, else returns 1.
+*/
+static int ludcmp(double a[][LUARRAYCOLS],
+ int n,
+ int indx[],
+ int *d)
+{
+
+double big; /* Holds largest element value */
+double sum;
+double dum; /* Holds dummy value */
+int i,j,k; /* Indexes */
+int imax=0; /* Holds max index value */
+double tiny; /* A really small number */
+
+tiny=(double)1.0e-20;
+
+*d=1; /* No interchanges yet */
+
+for(i=0;i<n;i++)
+{ big=(double)0.0;
+ for(j=0;j<n;j++)
+ if((double)fabs(a[i][j]) > big)
+ big=fabs(a[i][j]);
+ /* Bail out on singular matrix */
+ if(big==(double)0.0) return(0);
+ LUtempvv[i]=1.0/big;
+}
+
+/*
+** Crout's algorithm...loop over columns.
+*/
+for(j=0;j<n;j++)
+{ if(j!=0)
+ for(i=0;i<j;i++)
+ { sum=a[i][j];
+ if(i!=0)
+ for(k=0;k<i;k++)
+ sum-=(a[i][k]*a[k][j]);
+ a[i][j]=sum;
+ }
+ big=(double)0.0;
+ for(i=j;i<n;i++)
+ { sum=a[i][j];
+ if(j!=0)
+ for(k=0;k<j;k++)
+ sum-=a[i][k]*a[k][j];
+ a[i][j]=sum;
+ dum=LUtempvv[i]*fabs(sum);
+ if(dum>=big)
+ { big=dum;
+ imax=i;
+ }
+ }
+ if(j!=imax) /* Interchange rows if necessary */
+ { for(k=0;k<n;k++)
+ { dum=a[imax][k];
+ a[imax][k]=a[j][k];
+ a[j][k]=dum;
+ }
+ *d=-*d; /* Change parity of d */
+ dum=LUtempvv[imax];
+ LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
+ LUtempvv[j]=dum;
+ }
+ indx[j]=imax;
+ /*
+ ** If the pivot element is zero, the matrix is singular
+ ** (at least as far as the precision of the machine
+ ** is concerned.) We'll take the original author's
+ ** recommendation and replace 0.0 with "tiny".
+ */
+ if(a[j][j]==(double)0.0)
+ a[j][j]=tiny;
+
+ if(j!=(n-1))
+ { dum=1.0/a[j][j];
+ for(i=j+1;i<n;i++)
+ a[i][j]=a[i][j]*dum;
+ }
+}
+
+return(1);
+}
+
+/***********
+** lubksb **
+************
+** Also from "Numerical Recipes in Pascal".
+** This routine solves the set of n linear equations A X = B.
+** Here, a[][] is input, not as the matrix A, but as its
+** LU decomposition, created by the routine ludcmp().
+** Indx[] is input as the permutation vector returned by ludcmp().
+** b[] is input as the right-hand side an returns the
+** solution vector X.
+** a[], n, and indx are not modified by this routine and
+** can be left in place for different values of b[].
+** This routine takes into account the possibility that b will
+** begin with many zero elements, so it is efficient for use in
+** matrix inversion.
+*/
+static void lubksb( double a[][LUARRAYCOLS],
+ int n,
+ int indx[LUARRAYROWS],
+ double b[LUARRAYROWS])
+{
+
+int i,j; /* Indexes */
+int ip; /* "pointer" into indx */
+int ii;
+double sum;
+
+/*
+** When ii is set to a positive value, it will become
+** the index of the first nonvanishing element of b[].
+** We now do the forward substitution. The only wrinkle
+** is to unscramble the permutation as we go.
+*/
+ii=-1;
+for(i=0;i<n;i++)
+{ ip=indx[i];
+ sum=b[ip];
+ b[ip]=b[i];
+ if(ii!=-1)
+ for(j=ii;j<i;j++)
+ sum=sum-a[i][j]*b[j];
+ else
+ /*
+ ** If a nonzero element is encountered, we have
+ ** to do the sums in the loop above.
+ */
+ if(sum!=(double)0.0)
+ ii=i;
+ b[i]=sum;
+}
+/*
+** Do backsubstitution
+*/
+for(i=(n-1);i>=0;i--)
+{
+ sum=b[i];
+ if(i!=(n-1))
+ for(j=(i+1);j<n;j++)
+ sum=sum-a[i][j]*b[j];
+ b[i]=sum/a[i][i];
+}
+return;
+}
+
+/************
+** lusolve **
+*************
+** Solve a linear set of equations: A x = b
+** Original matrix A will be destroyed by this operation.
+** Returns 0 if matrix is singular, 1 otherwise.
+*/
+static int lusolve(double a[][LUARRAYCOLS],
+ int n,
+ double b[LUARRAYROWS])
+{
+int indx[LUARRAYROWS];
+int d;
+#ifdef DEBUG
+int i,j;
+#endif
+
+if(ludcmp(a,n,indx,&d)==0) return(0);
+
+/* Matrix not singular -- proceed */
+lubksb(a,n,indx,b);
+
+#ifdef DEBUG
+printf("Solution:\n");
+for(i=0;i<n;i++)
+{
+ for(j=0;j<n;j++){
+ /*
+ printf("%6.2f ",a[i][j]);
+ */
+ }
+ printf("%6.2f\t",b[i]);
+ /*
+ printf("\n");
+ */
+}
+printf("\n");
+#endif
+
+return(1);
+}
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/nbench1.h b/benchmarks/nbench/nbench-byte-2.2.3/nbench1.h
new file mode 100644
index 0000000..13a5907
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/nbench1.h
@@ -0,0 +1,428 @@
+/*
+** nbench1.h
+** Header for nbench1.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** DEFINES
+*/
+/* #define DEBUG */
+
+/*
+** EXTERNALS
+*/
+extern ulong global_min_ticks;
+
+extern SortStruct global_numsortstruct;
+extern SortStruct global_strsortstruct;
+extern BitOpStruct global_bitopstruct;
+extern EmFloatStruct global_emfloatstruct;
+extern FourierStruct global_fourierstruct;
+extern AssignStruct global_assignstruct;
+extern IDEAStruct global_ideastruct;
+extern HuffStruct global_huffstruct;
+extern NNetStruct global_nnetstruct;
+extern LUStruct global_lustruct;
+
+/* External PROTOTYPES */
+/*extern unsigned long abs_randwc(unsigned long num);*/ /* From MISC */
+/*extern long randnum(long lngval);*/
+extern int32 randwc(int32 num);
+extern u32 abs_randwc(u32 num);
+extern int32 randnum(int32 lngval);
+
+extern farvoid *AllocateMemory(unsigned long nbytes, /* From SYSSPEC */
+ int *errorcode);
+extern void FreeMemory(farvoid *mempointer,
+ int *errorcode);
+extern void MoveMemory(farvoid *destination,
+ farvoid *source, unsigned long nbytes);
+extern void ReportError(char *context, int errorcode);
+extern void ErrorExit();
+extern unsigned long StartStopwatch();
+extern unsigned long StopStopwatch(unsigned long startticks);
+extern unsigned long TicksToSecs(unsigned long tickamount);
+extern double TicksToFracSecs(unsigned long tickamount);
+
+/*****************
+** NUMERIC SORT **
+*****************/
+
+/*
+** PROTOTYPES
+*/
+void DoNumSort(void);
+static ulong DoNumSortIteration(farlong *arraybase,
+ ulong arraysize,
+ uint numarrays);
+static void LoadNumArrayWithRand(farlong *array,
+ ulong arraysize,
+ uint numarrays);
+static void NumHeapSort(farlong *array,
+ ulong bottom,
+ ulong top);
+static void NumSift(farlong *array,
+ ulong i,
+ ulong j);
+
+
+/****************
+** STRING SORT **
+*****************
+*/
+
+
+/*
+** PROTOTYPES
+*/
+void DoStringSort(void);
+static ulong DoStringSortIteration(faruchar *arraybase,
+ uint numarrays,
+ ulong arraysize);
+static farulong *LoadStringArray(faruchar *strarray,
+ uint numarrays,
+ ulong *strings,
+ ulong arraysize);
+static void stradjust(farulong *optrarray,
+ faruchar *strarray,
+ ulong nstrings,
+ ulong i,
+ uchar l);
+static void StrHeapSort(farulong *optrarray,
+ faruchar *strarray,
+ ulong numstrings,
+ ulong bottom,
+ ulong top);
+static int str_is_less(farulong *optrarray,
+ faruchar *strarray,
+ ulong numstrings,
+ ulong a,
+ ulong b);
+static void strsift(farulong *optrarray,
+ faruchar *strarray,
+ ulong numstrings,
+ ulong i,
+ ulong j);
+
+/************************
+** BITFIELD OPERATIONS **
+*************************
+*/
+
+/*
+** PROTOTYPES
+*/
+void DoBitops(void);
+static ulong DoBitfieldIteration(farulong *bitarraybase,
+ farulong *bitoparraybase,
+ long bitoparraysize,
+ ulong *nbitops);
+static void ToggleBitRun(farulong *bitmap,
+ ulong bit_addr,
+ ulong nbits,
+ uint val);
+static void FlipBitRun(farulong *bitmap,
+ ulong bit_addr,
+ ulong nbits);
+
+/****************************
+** EMULATED FLOATING POINT **
+****************************/
+typedef struct
+{
+ u8 type; /* Indicates, NORMAL, SUBNORMAL, etc. */
+ u8 sign; /* Mantissa sign */
+ short exp; /* Signed exponent...no bias */
+ u16 mantissa[INTERNAL_FPF_PRECISION];
+} InternalFPF;
+
+/*
+** PROTOTYPES
+*/
+void DoEmFloat(void);
+
+/*
+** EXTERNALS
+*/
+extern void SetupCPUEmFloatArrays(InternalFPF *abase,
+ InternalFPF *bbase, InternalFPF *cbase,
+ ulong arraysize);
+extern ulong DoEmFloatIteration(InternalFPF *abase,
+ InternalFPF *bbase, InternalFPF *cbase,
+ ulong arraysize, ulong loops);
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/*
+** PROTOTYPES
+*/
+void DoFourier(void);
+static ulong DoFPUTransIteration(fardouble *abase,
+ fardouble *bbase,
+ ulong arraysize);
+static double TrapezoidIntegrate(double x0,
+ double x1,
+ int nsteps,
+ double omegan,
+ int select);
+static double thefunction(double x,
+ double omegan,
+ int select);
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*
+** DEFINES
+*/
+
+#define ASSIGNROWS 101L
+#define ASSIGNCOLS 101L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ union {
+ long *p;
+ long (*ap)[ASSIGNROWS][ASSIGNCOLS];
+ } ptrs;
+} longptr;
+
+/*
+** PROTOTYPES
+*/
+void DoAssign(void);
+static ulong DoAssignIteration(farlong *arraybase,
+ ulong numarrays);
+static void LoadAssignArrayWithRand(farlong *arraybase,
+ ulong numarrays);
+static void LoadAssign(farlong arraybase[][ASSIGNCOLS]);
+static void CopyToAssign(farlong arrayfrom[][ASSIGNCOLS],
+ long arrayto[][ASSIGNCOLS]);
+static void Assignment(farlong arraybase[][ASSIGNCOLS]);
+static void calc_minimum_costs(long tableau[][ASSIGNCOLS]);
+static int first_assignments(long tableau[][ASSIGNCOLS],
+ short assignedtableau[][ASSIGNCOLS]);
+static void second_assignments(long tableau[][ASSIGNCOLS],
+ short assignedtableau[][ASSIGNCOLS]);
+
+/********************
+** IDEA ENCRYPTION **
+********************/
+
+/*
+** DEFINES
+*/
+#define IDEAKEYSIZE 16
+#define IDEABLOCKSIZE 8
+#define ROUNDS 8
+#define KEYLEN (6*ROUNDS+4)
+
+/*
+** MACROS
+*/
+#define low16(x) ((x) & 0x0FFFF)
+#define MUL(x,y) (x=mul(low16(x),y))
+
+
+typedef u16 IDEAkey[KEYLEN];
+
+/*
+** PROTOTYPES
+*/
+void DoIDEA(void);
+static ulong DoIDEAIteration(faruchar *plain1,
+ faruchar *crypt1, faruchar *plain2,
+ ulong arraysize, ulong nloops,
+ IDEAkey Z, IDEAkey DK);
+static u16 mul(register u16 a, register u16 b);
+static u16 inv(u16 x);
+static void en_key_idea(u16 userkey[8], IDEAkey Z);
+static void de_key_idea(IDEAkey Z, IDEAkey DK);
+static void cipher_idea(u16 in[4], u16 out[4], IDEAkey Z);
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/*
+** DEFINES
+*/
+#define EXCLUDED 32000L /* Big positive value */
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ uchar c; /* Byte value */
+ float freq; /* Frequency */
+ int parent; /* Parent node */
+ int left; /* Left pointer = 0 */
+ int right; /* Right pointer = 1 */
+} huff_node;
+
+/*
+** GLOBALS
+*/
+static huff_node *hufftree; /* The huffman tree */
+static long plaintextlen; /* Length of plaintext */
+
+/*
+** PROTOTYPES
+*/
+void DoHuffman();
+static void create_text_line(farchar *dt,long nchars);
+static void create_text_block(farchar *tb, ulong tblen,
+ ushort maxlinlen);
+static ulong DoHuffIteration(farchar *plaintext,
+ farchar *comparray, farchar *decomparray,
+ ulong arraysize, ulong nloops, huff_node *hufftree);
+static void SetCompBit(u8 *comparray, u32 bitoffset, char bitchar);
+static int GetCompBit(u8 *comparray, u32 bitoffset);
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+********************************/
+
+/*
+** DEFINES
+*/
+#define T 1 /* TRUE */
+#define F 0 /* FALSE */
+#define ERR -1
+#define MAXPATS 10 /* max number of patterns in data file */
+#define IN_X_SIZE 5 /* number of neurodes/row of input layer */
+#define IN_Y_SIZE 7 /* number of neurodes/col of input layer */
+#define IN_SIZE 35 /* equals IN_X_SIZE*IN_Y_SIZE */
+#define MID_SIZE 8 /* number of neurodes in middle layer */
+#define OUT_SIZE 8 /* number of neurodes in output layer */
+#define MARGIN 0.1 /* how near to 1,0 do we have to come to stop? */
+#define BETA 0.09 /* beta learning constant */
+#define ALPHA 0.09 /* momentum term constant */
+#define STOP 0.1 /* when worst_error less than STOP, training is done */
+
+/*
+** GLOBALS
+*/
+double mid_wts[MID_SIZE][IN_SIZE]; /* middle layer weights */
+double out_wts[OUT_SIZE][MID_SIZE]; /* output layer weights */
+double mid_out[MID_SIZE]; /* middle layer output */
+double out_out[OUT_SIZE]; /* output layer output */
+double mid_error[MID_SIZE]; /* middle layer errors */
+double out_error[OUT_SIZE]; /* output layer errors */
+double mid_wt_change[MID_SIZE][IN_SIZE]; /* storage for last wt change */
+double out_wt_change[OUT_SIZE][MID_SIZE]; /* storage for last wt change */
+double in_pats[MAXPATS][IN_SIZE]; /* input patterns */
+double out_pats[MAXPATS][OUT_SIZE]; /* desired output patterns */
+double tot_out_error[MAXPATS]; /* measure of whether net is done */
+double out_wt_cum_change[OUT_SIZE][MID_SIZE]; /* accumulated wt changes */
+double mid_wt_cum_change[MID_SIZE][IN_SIZE]; /* accumulated wt changes */
+
+double worst_error; /* worst error each pass through the data */
+double average_error; /* average error each pass through the data */
+double avg_out_error[MAXPATS]; /* average error each pattern */
+
+int iteration_count; /* number of passes thru network so far */
+int numpats; /* number of patterns in data file */
+int numpasses; /* number of training passes through data file */
+int learned; /* flag--if TRUE, network has learned all patterns */
+
+/*
+** The Neural Net test requires an input data file.
+** The name is specified here.
+*/
+char *inpath="NNET.DAT";
+
+/*
+** PROTOTYPES
+*/
+void DoNNET(void);
+static ulong DoNNetIteration(ulong nloops);
+static void do_mid_forward(int patt);
+static void do_out_forward();
+void display_output(int patt);
+static void do_forward_pass(int patt);
+static void do_out_error(int patt);
+static void worst_pass_error();
+static void do_mid_error();
+static void adjust_out_wts();
+static void adjust_mid_wts();
+static void do_back_pass(int patt);
+static void move_wt_changes();
+static int check_out_error();
+static void zero_changes();
+static void randomize_wts();
+static int read_data_file();
+/* static int initialize_net(); */
+
+/***********************
+** LU DECOMPOSITION **
+** (Linear Equations) **
+***********************/
+
+/*
+** DEFINES
+*/
+
+#define LUARRAYROWS 101L
+#define LUARRAYCOLS 101L
+
+/*
+** TYPEDEFS
+*/
+typedef struct
+{ union
+ { fardouble *p;
+ fardouble (*ap)[][LUARRAYCOLS];
+ } ptrs;
+} LUdblptr;
+
+/*
+** GLOBALS
+*/
+fardouble *LUtempvv;
+
+/*
+** PROTOTYPES
+*/
+void DoLU(void);
+static void LUFreeMem(fardouble *a, fardouble *b,
+ fardouble *abase, fardouble *bbase);
+static ulong DoLUIteration(fardouble *a, fardouble *b,
+ fardouble *abase, fardouble *bbase,
+ ulong numarrays);
+static void build_problem( double a[][LUARRAYCOLS],
+ int n, double b[LUARRAYROWS]);
+static int ludcmp(double a[][LUARRAYCOLS],
+ int n, int indx[], int *d);
+static void lubksb(double a[][LUARRAYCOLS],
+ int n, int indx[LUARRAYROWS],
+ double b[LUARRAYROWS]);
+static int lusolve(double a[][LUARRAYCOLS],
+ int n, double b[LUARRAYROWS]);
+
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/nmglobal.h b/benchmarks/nbench/nbench-byte-2.2.3/nmglobal.h
new file mode 100644
index 0000000..ffa2649
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/nmglobal.h
@@ -0,0 +1,522 @@
+/*
+** nmglobal.h
+** Global definitions for native mode benchmarks.
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+** 10/95 - Added memory array & alignment -- RG
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+#define LINUX
+//#define DEBUG
+
+/* is this a 64 bit architecture? If so, this will define LONG64 */
+#include "pointer.h"
+
+/*
+** SYSTEM DEFINES
+*/
+
+/* +++ MEMORY +++ */
+
+/*
+** You must define ONLY ONE of the following identifiers
+** to specify the mechanism for allocating memory:
+** MALLOCMEM
+** DOS16MEM
+** MACMEM
+*/
+
+/*
+** Define MALLOCMEM to use the standard malloc() call for
+** memory. This is the default for most systems.
+*/
+#define MALLOCMEM
+
+/*
+** Define DOS16MEM if you're running in the old 16-bit segmented
+** model. This enables some fruity memory management routines
+** required for that model. NOT defining this assumes that
+** you're running in an environment that allows malloc() to
+** get > 64K chunks of memory.
+*/
+/* #define DOS16MEM */
+
+/* Define MACMEM to use the Mac's GetPtr call to allocate
+** memory (instead of malloc()).
+*/
+/* #define MACMEM */
+
+/* +++ TIMING +++ */
+/*
+** You must define ONLY ONE of the following identifiers to pick
+** the timing routine used.
+** CLOCKWCPS
+** CLOCKWCT
+** MACTIMEMGR
+** WIN31TIMER
+*/
+
+/*
+** Define CLOCKWCPS if you are using the clock() routine and the
+** constant used as the divisor to determine seconds is
+** CLOCKS_PER_SEC. This is the default in most cases.
+*/
+#define CLOCKWCPS
+
+/*
+** Define CLOCKWCT if you are using the clock() routine and the
+** constant used as the divisor to determine seconds is CLK_TCK
+*/
+/* #define CLOCKWCT */
+
+/*
+** Define MACTIMEMGR to use the Mac Time manager routines.
+** You'll need to be running at least system 6.0.3 or
+** better...extended time manager is recommended (system 7 or
+** better).
+*/
+/* #define MACTIMEMGR */
+
+/*
+** Define WIN31TIMER to user the timing routines in TOOLHELP.DLL.
+** Gets accuracy down to the millisecond.
+*/
+/* #define WIN31TIMER */
+
+/* +++ MISCELLANEOUS +++ */
+
+/*
+** Define DOS16 if you'll be compiling under DOS in 16-bit
+** (non DOS-extended) mode. This will enable proper definitions
+** for the far*** typedefs
+*/
+/* #define DOS16 */
+
+/*
+** Define MAC if you're compiling on a Macintosh. This
+** does a number of things:
+** includes unix.h
+** Incorporates code to mimic the command line via either
+** the console library (Symantec/Think) or the SIOUX
+** library (Code Warrior).
+*/
+/* #define MAC */
+
+/*
+** Define LONG64 if your compiler emits 64-bit longs.
+** This is typically true of Alpha compilers on Unix
+** systems...though, who knows, this may change in the
+** future. I MOVED THIS DEFINTION INTO THE FILE pointer.h. DO NOT
+** DEFINE IT HERE. IT WILL AUTOMATICALLY BE DEFINED IF NECESSARY.
+** Uwe F. Mayer, Dec 15, 1996, Nov 15, 1997
+*/
+/* #define LONG64 */
+
+/*
+** Define MACCWPROF if you are profiling on the Mac using
+** Code Warrior. This enables code that turns off the
+** profiler in an evern of an error exit.
+*/
+/* #define MACCWPROF */
+
+#ifdef MAC
+#include <unix.h>
+#endif
+
+/*
+** ERROR CODES
+*/
+#define ERROR_MEMORY 1
+#define ERROR_MEMARRAY_FULL 2
+#define ERROR_MEMARRAY_NFOUND 3
+#define ERROR_FILECREATE 10
+#define ERROR_FILEREAD 11
+#define ERROR_FILEWRITE 12
+#define ERROR_FILEOPEN 13
+#define ERROR_FILESEEK 14
+
+/*
+** MINIMUM_TICKS
+**
+** This sets the default number of minimum ticks.
+** It can, of course, be overridden by the input
+** command file.
+** This ultimately gets loaded into the variable
+** global_min_ticks, which specifies the minimum
+** number of ticks that must take place between
+** a StartStopwatch() and StopStopwatch() call.
+** The idea is to reduce error buildup.
+*/
+#define MINIMUM_TICKS 60
+
+/*
+** MINIMUM_SECONDS
+**
+** Minimum number of seconds to run each test.
+*/
+#define MINIMUM_SECONDS 5
+
+/*
+** MAXPOSLONG
+**
+** This is the maximum positive long.
+*/
+#ifdef LONG64
+#define MAXPOSLONG 0x7FFFFFFFFFFFFFFFL
+#else
+#define MAXPOSLONG 0x7FFFFFFFL
+#endif
+
+/*
+** OTHER DEFINES
+*/
+#ifndef MAC
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/*
+** Memory array size. Used in SYSSPEC for keeping track
+** of re-aligned memory.
+*/
+#define MEM_ARRAY_SIZE 20
+
+/*
+** TYPEDEFS
+*/
+#define ulong unsigned long
+#define uchar unsigned char
+#define uint unsigned int
+#define ushort unsigned short
+/*
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef unsigned long ulong;
+*/
+/*
+** The 'farxxx' typedefs were added in deference to DOS, which
+** requires far pointers to handle some of the bigger
+** memory structures. Other systems will simply
+** map 'farxxx' to 'xxx'
+*/
+#ifdef DOS16
+typedef void huge farvoid;
+typedef double huge fardouble;
+typedef long huge farlong;
+typedef unsigned long huge farulong;
+typedef char huge farchar;
+typedef unsigned char huge faruchar;
+
+#else
+
+typedef void farvoid;
+typedef double fardouble;
+typedef long farlong;
+typedef unsigned long farulong;
+typedef char farchar;
+typedef unsigned char faruchar;
+
+#endif
+
+/*
+** The following typedefs are used when element size
+** is critical. You'll have to alter these for
+** your specifical platform/compiler.
+*/
+typedef unsigned char u8; /* Unsigned 8-bits */
+typedef unsigned short u16; /* Unsigned 16 bits */
+#ifdef LONG64
+typedef unsigned int u32; /* Unsigned 32 bits */
+typedef int int32; /* Signed 32 bit integer */
+#else
+typedef unsigned long u32; /* Unsigned 32 bits */
+typedef long int32; /* Signed 32 bit integer */
+#endif
+
+/*****************
+** NUMERIC SORT **
+*****************/
+/*
+** DEFINES
+*/
+
+/*
+** The following constant, NUMNUMARRAYS (no, it is not a
+** Peter Sellers joke) is the maximum number of arrays
+** that can be built by the numeric sorting benchmark
+** before it gives up. This maximum is dependent on the
+** amount of memory in the system.
+*/
+/*#define NUMNUMARRAYS 1000*/
+#define NUMNUMARRAYS 10000
+
+/*
+** The following constant NUMARRAYSIZE determines the
+** default # of elements in each numeric array. Ordinarily
+** this is something you shouldn't fool with, though as
+** with most of the constants here, it is adjustable.
+*/
+#define NUMARRAYSIZE 8111L
+
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* # of seconds requested */
+ double sortspersec; /* # of sort iterations per sec */
+ ushort numarrays; /* # of arrays */
+ ulong arraysize; /* # of elements in array */
+} SortStruct;
+
+/****************
+** STRING SORT **
+*****************
+** Note: The string sort benchmark uses the same structure to
+** communicate parameters as does the numeric sort benchmark.
+** (i.e., SortStruct...see above.
+*/
+
+/*
+** DEFINES
+*/
+/*
+** The following constant STRINGARRAYSIZE determines
+** the default # of bytes allocated to each string array.
+** Though the actual size can be pre-set from the command
+** file, this constant should be left unchanged.
+*/
+#define STRINGARRAYSIZE 8111L
+
+/************************
+** BITFIELD OPERATIONS **
+*************************
+*/
+
+/*
+** DEFINES
+*/
+
+/*
+** Following field sets the size of the bitfield array (in longs).
+*/
+#ifdef LONG64
+#define BITFARRAYSIZE 16384L
+#else
+#define BITFARRAYSIZE 32768L
+#endif
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* # of seconds requested */
+ double bitopspersec; /* # of bitfield ops per sec */
+ ulong bitoparraysize; /* Total # of bitfield ops */
+ ulong bitfieldarraysize; /* Bit field array size */
+} BitOpStruct;
+
+/****************************
+** EMULATED FLOATING POINT **
+****************************/
+/*
+** DEFINES
+*/
+#define INTERNAL_FPF_PRECISION 4
+
+/*
+** The following constant is the maximum number of loops
+** of the emulated floating point test that the system
+** will allow before flagging an error. This is not a
+** critical constant, and can be altered if your system is
+** a real barn-burner.
+*/
+/*#define CPUEMFLOATLOOPMAX 50000L*/
+#define CPUEMFLOATLOOPMAX 500000L
+
+/*
+** Set size of array
+*/
+#define EMFARRAYSIZE 3000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* # of seconds requested */
+ ulong arraysize; /* Size of array */
+ ulong loops; /* Loops per iterations */
+ double emflops; /* Results */
+} EmFloatStruct;
+
+/*************************
+** FOURIER COEFFICIENTS **
+*************************/
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* # of requested seconds */
+ ulong arraysize; /* Size of coeff. arrays */
+ double fflops; /* Results */
+} FourierStruct;
+
+/*************************
+** ASSIGNMENT ALGORITHM **
+*************************/
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* Requested # of seconds */
+ ulong numarrays; /* # of arrays */
+ double iterspersec; /* Results */
+} AssignStruct;
+
+/********************
+** IDEA ENCRYPTION **
+********************/
+
+/*
+** DEFINES
+*/
+/* Following constant defines the max number of loops the
+** system will attempt. Keeps things from going off into the
+** weeds. */
+/*#define MAXIDEALOOPS 50000L*/
+#define MAXIDEALOOPS 500000L
+
+/*
+** Following constant sets the size of the arrays.
+** NOTE: For the IDEA algorithm to work properly, this
+** number MUST be some multiple of 8.
+*/
+#define IDEAARRAYSIZE 4000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* Requested # of seconds */
+ ulong arraysize; /* Size of array */
+ ulong loops; /* # of times to convert */
+ double iterspersec; /* Results */
+} IDEAStruct;
+
+
+/************************
+** HUFFMAN COMPRESSION **
+************************/
+
+/*
+** DEFINES
+*/
+/*
+** MAXHUFFLOOPS
+**
+** This constant specifies the maximum number of Huffman
+** compression loops the system will try for. This keeps
+** the test from going off into the weeds. This is not
+** a critical constant, and can be increased if your
+** system is a real barn-burner.
+*/
+/*#define MAXHUFFLOOPS 50000L*/
+#define MAXHUFFLOOPS 500000L
+
+/*
+** Following constant sets the size of the arrays to
+** be compressed/uncompressed.
+*/
+#define HUFFARRAYSIZE 5000L
+
+/*
+** TYPEDEFS
+*/
+
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* Requested # of seconds */
+ ulong arraysize; /* Size of array */
+ ulong loops; /* # of times to compress/decompress */
+ double iterspersec; /* Results */
+} HuffStruct;
+
+/********************************
+** BACK PROPAGATION NEURAL NET **
+********************************/
+
+/*
+** MAXNNETLOOPS
+**
+** This constant sets the max number of loops through the neural
+** net that the system will attempt before giving up. This
+** is not a critical constant. You can alter it if your system
+** has sufficient horsepower.
+*/
+/*#define MAXNNETLOOPS 50000L*/
+#define MAXNNETLOOPS 500000L
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* Requested # of seconds */
+ ulong loops; /* # of times to learn */
+ double iterspersec; /* Results */
+} NNetStruct;
+
+/***********************
+** LU DECOMPOSITION **
+** (Linear Equations) **
+***********************/
+
+/*
+** MAXLUARRAYS
+**
+** This sets the upper limit on the number of arrays
+** that the benchmark will attempt to build before
+** flagging an error. It is not a critical constant, and
+** may be increased if your system has the horsepower.
+*/
+/*#define MAXLUARRAYS 1000*/
+#define MAXLUARRAYS 10000
+
+/*
+** TYPEDEFS
+*/
+typedef struct {
+ int adjust; /* Set adjust code */
+ ulong request_secs; /* Requested # of seconds */
+ ulong numarrays; /* # of arrays */
+ double iterspersec; /* Results */
+} LUStruct;
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/pointer.c b/benchmarks/nbench/nbench-byte-2.2.3/pointer.c
new file mode 100644
index 0000000..f4de577
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/pointer.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+int main(){
+ printf("%d",(int)sizeof(long));
+ return(0);
+}
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/pointer.h b/benchmarks/nbench/nbench-byte-2.2.3/pointer.h
new file mode 100644
index 0000000..249480a
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/pointer.h
@@ -0,0 +1 @@
+//#define LONG64
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c
new file mode 100644
index 0000000..7727abe
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**Linux rokka-laptop 2.6.32-29-generic #58-Ubuntu SMP Fri Feb 11 20:52:10 UTC 20\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5) \n");
+output_string(buffer);
+sprintf(buffer,"**libc: \n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: Wed Mar 16 02:20:48 EET 2011\n");
+output_string(buffer);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.example b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.example
new file mode 100644
index 0000000..db650f0
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.example
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**Linux mimi 2.0.31 #5 Thu Oct 23 10:02:08 CDT 1997 i486\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: gcc version 2.7.2.3\n");
+output_string(buffer);
+sprintf(buffer,"**libc: libc.so.5.4.38\n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: Thu Nov 20 10:04:43 CST 1997\n");
+output_string(buffer);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.template b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.template
new file mode 100644
index 0000000..c1a986c
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.c.template
@@ -0,0 +1,10 @@
+sprintf(buffer,"**System used for compilation:\n");
+output_string(buffer);
+sprintf(buffer,"**%SYSTEM%\n");
+output_string(buffer);
+sprintf(buffer,"**C compiler: %CCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"**libc: %LIBCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"**Date of compilation: %DATE%\n");
+output_string(buffer);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.sh b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.sh
new file mode 100755
index 0000000..57754fe
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfo.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+# the arguments of this script are the compiler name and flags
+
+# try to solve a chicken-and-egg problem on SunOS
+# ucb's test program does not handle -L like the other test programs
+# let's try to find another implementation
+if test -x /bin/test; then
+ TEST=/bin/test;
+else
+ if test -x /usr/bin/test; then
+ TEST=/usr/bin/test;
+ else
+ # cross your fingers that it's not like ucb test
+ TEST=test;
+ fi
+fi
+
+compiler=`echo $* | sed -e 's/-static//g' -e 's/-Bstatic//g'`
+if $TEST `basename $1` = "gcc" && ($compiler -v) >/dev/null 2>&1 ; then
+# Cygwin writes more than one line with "version" in it
+ gccversion=`$compiler -v 2>&1 | sed -e "/version/!d" | tail -n 1`
+else
+ gccversion="$1"
+fi
+
+libcversion=""
+if ($* hello.c -o hello) >/dev/null 2>&1; then
+ ldd_output=`(ldd hello) 2>&1`
+ libcversion=`echo $ldd_output | sed -e 's/.*static.*/static/' \
+ -e 's/.*not a dynamic.*/static/'`
+ if $TEST "$libcversion" = "static" ; then
+ if ($compiler hello.c -o hello) >/dev/null 2>&1; then
+ if (ldd hello) >/dev/null 2>/dev/null; then
+ libcversion=`(ldd hello) 2>&1`
+ libcversion=`echo $libcversion | sed -e '/libc/!d'\
+ -e 's/^[ ]*//' \
+ -e 's/.*=>[ ][ ]*\([^ ]*\).*/\1/'`
+ # remember the current directory
+ current=`pwd`
+ while $TEST -L "$libcversion" && ! $TEST "$libcversion" = "" ; do
+ libcitself=`basename $libcversion`
+ libpath=`echo $libcversion | sed -e "s/$libcitself$//"`
+ if $TEST -d "$libpath" ; then
+ cd $libpath
+ fi
+ if ls $libcitself >/dev/null 2>/dev/null ; then
+ libcversion=`ls -l $libcitself | \
+ sed -e 's/.*->[ ][ ]*\(.*\)$/\1/'`
+ else
+ # something must have gone wrong, let's bail out
+ libcversion=""
+ fi
+ done
+ # return to the current directory
+ cd $current
+ fi
+ fi
+ else
+ libcversion=""
+ fi
+fi
+
+rm -f sysinfo.crm sysinfoc.c hello
+
+# this bombs out on Ultrix which expect "cut -d"
+
+compsystem=`uname -a | cut -b 1-78`
+compdate=`date|cut -b1-55`
+
+# let's hope that ctrl-c is not part of any string here
+# this also will barf later if " is in any of the strings
+
+for i in sysinfo.c sysinfoc.c ; do
+ sed -e "s%CCVERSION%$gccversion" -e "s%LIBCVERSION%$libcversion"\
+ -e "s%SYSTEM%$compsystem" -e "s%DATE%$compdate"\
+ ${i}.template > $i
+done
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c b/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c
new file mode 100644
index 0000000..e5902d3
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler : gcc version 4.4.3 (Ubuntu 4.4.3-4ubuntu5) \n");
+output_string(buffer);
+sprintf(buffer,"libc : \n");
+output_string(buffer);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.example b/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.example
new file mode 100644
index 0000000..7da71ac
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.example
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler : gcc version 2.7.2.3\n");
+output_string(buffer);
+sprintf(buffer,"libc : libc.so.5.4.38\n");
+output_string(buffer);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.template b/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.template
new file mode 100644
index 0000000..922a5de
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysinfoc.c.template
@@ -0,0 +1,4 @@
+sprintf(buffer,"C compiler : %CCVERSION%\n");
+output_string(buffer);
+sprintf(buffer,"libc : %LIBCVERSION%\n");
+output_string(buffer);
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysspec.c b/benchmarks/nbench/nbench-byte-2.2.3/sysspec.c
new file mode 100644
index 0000000..cdaddec
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysspec.c
@@ -0,0 +1,894 @@
+
+#include <rtems.h>
+
+/*
+** sysspec.c
+** System-specific routines.
+**
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95;10/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/***********************************
+** SYSTEM-SPECIFIC ROUTINES **
+************************************
+**
+** These are the routines that provide functions that are
+** system-specific. If the benchmarks are to be ported
+** to new hardware/new O.S., this is the first place to
+** start.
+*/
+#include "sysspec.h"
+
+#ifdef DOS16
+#include <io.h>
+#include <fcntl.h>
+#include <sys\stat.h>
+#endif
+/*********************************
+** MEMORY MANAGEMENT ROUTINES **
+*********************************/
+
+
+/****************************
+** AllocateMemory
+** This routine returns a void pointer to a memory
+** block. The size of the memory block is given in bytes
+** as the first argument. This routine also returns an
+** error code in the second argument.
+** 10/95 Update:
+** Added an associative array for memory alignment reasons.
+** mem_array[2][MEM_ARRAY_SIZE]
+** mem_array[0][n] = Actual address (from malloc)
+** mem_array[1][n] = Aligned address
+** Currently, mem_array[][] is only used if you use malloc;
+** it is not used for the 16-bit DOS and MAC versions.
+*/
+farvoid *AllocateMemory(unsigned long nbytes, /* # of bytes to alloc */
+ int *errorcode) /* Returned error code */
+{
+
+#ifdef DOS16MEM
+union REGS registers;
+unsigned short nparas; /* # of paragraphs */
+
+/*
+** Set # of paragraphs to nbytes/16 +1. The +1 is a
+** slop factor.
+*/
+nparas=(unsigned short)(nbytes/16L) + 1;
+
+/*
+** Set incoming registers.
+*/
+registers.h.ah=0x48; /* Allocate memory */
+registers.x.bx=nparas; /* # of paragraphs */
+
+
+intdos(&registers,&registers); /* Call DOS */
+
+/*
+** See if things succeeded.
+*/
+if(registers.x.cflag)
+{ printf("error: %d Lgst: %d\n",registers.x.ax,registers.x.bx);
+ *errorcode=ERROR_MEMORY;
+ return((farvoid *)NULL);
+}
+
+/*
+** Create a farvoid pointer to return.
+*/
+*errorcode=0;
+return((farvoid *)MK_FP(registers.x.ax,0));
+
+#endif
+
+#ifdef MACMEM
+/*
+** For MAC CodeWarrior, we'll use the MacOS NewPtr call
+*/
+farvoid *returnval;
+returnval=(farvoid *)NewPtr((Size)nbytes);
+if(returnval==(farvoid *)NULL)
+ *errorcode=ERROR_MEMORY;
+else
+ *errorcode=0;
+return(returnval);
+#endif
+
+#ifdef MALLOCMEM
+/*
+** Everyone else, its pretty straightforward, given
+** that you use a 32-bit compiler which treats size_t as
+** a 4-byte entity.
+*/
+farvoid *returnval; /* Return value */
+ulong true_addr; /* True address */
+ulong adj_addr; /* Adjusted address */
+
+returnval=(farvoid *)malloc((size_t)(nbytes+2L*(long)global_align));
+if(returnval==(farvoid *)NULL)
+ *errorcode=ERROR_MEMORY;
+else
+ *errorcode=0;
+
+/*
+** Check for alignment
+*/
+adj_addr=true_addr=(ulong)returnval;
+if(global_align==0)
+{
+ if(AddMemArray(true_addr, adj_addr))
+ *errorcode=ERROR_MEMARRAY_FULL;
+ return(returnval);
+}
+
+if(global_align==1)
+{
+ if(true_addr%2==0) adj_addr++;
+}
+else
+{
+ while(adj_addr%global_align!=0) ++adj_addr;
+ if(adj_addr%(global_align*2)==0) adj_addr+=global_align;
+}
+returnval=(void *)adj_addr;
+if(AddMemArray(true_addr,adj_addr))
+ *errorcode=ERROR_MEMARRAY_FULL;
+return(returnval);
+#endif
+
+}
+
+
+/****************************
+** FreeMemory
+** This is the reverse of AllocateMemory. The memory
+** block passed in is freed. Should an error occur,
+** that error is returned in errorcode.
+*/
+void FreeMemory(farvoid *mempointer, /* Pointer to memory block */
+ int *errorcode)
+{
+
+#ifdef DOS16MEM
+/*
+** 16-bit DOS VERSION!!
+*/
+unsigned int segment;
+unsigned int offset;
+union REGS registers;
+struct SREGS sregisters;
+
+/*
+** First get the segment/offset of the farvoid pointer.
+*/
+segment=FP_SEG(mempointer);
+offset=FP_OFF(mempointer);
+
+/*
+** Align the segment properly. For as long as offset > 16,
+** subtract 16 from offset and add 1 to segment.
+*/
+while(offset>=16)
+{ offset-=16;
+ segment++;
+}
+
+/*
+** Build the call to DOS
+*/
+registers.h.ah=0x49; /* Free memory */
+sregisters.es=segment;
+
+intdosx(&registers,&registers,&sregisters);
+
+/*
+** Check for error
+*/
+if(registers.x.cflag)
+{ *errorcode=ERROR_MEMORY;
+ return;
+}
+
+*errorcode=0;
+return;
+#endif
+
+#ifdef MACMEM
+DisposPtr((Ptr)mempointer);
+*errorcode=0;
+return;
+#endif
+
+#ifdef MALLOCMEM
+ulong adj_addr, true_addr;
+
+/* Locate item in memory array */
+adj_addr=(ulong)mempointer;
+if(RemoveMemArray(adj_addr, &true_addr))
+{ *errorcode=ERROR_MEMARRAY_NFOUND;
+ return;
+}
+mempointer=(void *)true_addr;
+free(mempointer);
+*errorcode=0;
+return;
+#endif
+}
+
+/****************************
+** MoveMemory
+** Moves n bytes from a to b. Handles overlap.
+** In most cases, this is just a memmove operation.
+** But, not in DOS....noooo....
+*/
+void MoveMemory( farvoid *destination, /* Destination address */
+ farvoid *source, /* Source address */
+ unsigned long nbytes)
+{
+
+/* +++16-bit DOS VERSION+++ */
+#ifdef DOS16MEM
+
+ FarDOSmemmove( destination, source, nbytes);
+
+#else
+
+memmove(destination, source, nbytes);
+
+#endif
+}
+
+#ifdef DOS16MEM
+
+/****************************
+** FarDOSmemmove
+** Performs the same function as memmove for DOS when
+** the arrays are defined with far pointers.
+*/
+void FarDOSmemmove(farvoid *destination, /* Destination pointer */
+ farvoid *source, /* Source pointer */
+ unsigned long nbytes) /* # of bytes to move */
+{
+unsigned char huge *uchsource; /* Temp source */
+unsigned char huge *uchdest; /* Temp destination */
+unsigned long saddr; /* Source "true" address */
+unsigned long daddr; /* Destination "true" address */
+
+
+/*
+** Get unsigned char pointer equivalents
+*/
+uchsource=(unsigned char huge *)source;
+uchdest=(unsigned char huge *)destination;
+
+/*
+** Calculate true address of source and destination and
+** compare.
+*/
+saddr=(unsigned long)(FP_SEG(source)*16 + FP_OFF(source));
+daddr=(unsigned long)(FP_SEG(destination)*16 + FP_OFF(destination));
+
+if(saddr > daddr)
+{
+ /*
+ ** Source is greater than destination.
+ ** Use a series of standard move operations.
+ ** We'll move 65535 bytes at a time.
+ */
+ while(nbytes>=65535L)
+ { _fmemmove((farvoid *)uchdest,
+ (farvoid *)uchsource,
+ (size_t) 65535);
+ uchsource+=65535; /* Advance pointers */
+ uchdest+=65535;
+ nbytes-=65535;
+ }
+
+ /*
+ ** Move remaining bytes
+ */
+ if(nbytes!=0L)
+ _fmemmove((farvoid *)uchdest,
+ (farvoid *)uchsource,
+ (size_t)(nbytes & 0xFFFF));
+
+}
+else
+{
+ /*
+ ** Destination is greater than source.
+ ** Advance pointers to the end of their
+ ** respective blocks.
+ */
+ uchsource+=nbytes;
+ uchdest+=nbytes;
+
+ /*
+ ** Again, move 65535 bytes at a time. However,
+ ** "back" the pointers up before doing the
+ ** move.
+ */
+ while(nbytes>=65535L)
+ {
+ uchsource-=65535;
+ uchdest-=65535;
+ _fmemmove((farvoid *)uchdest,
+ (farvoid *)uchsource,
+ (size_t) 65535);
+ nbytes-=65535;
+ }
+
+ /*
+ ** Move remaining bytes.
+ */
+ if(nbytes!=0L)
+ { uchsource-=nbytes;
+ uchdest-=nbytes;
+ _fmemmove((farvoid *)uchdest,
+ (farvoid *)uchsource,
+ (size_t)(nbytes & 0xFFFF));
+ }
+}
+return;
+}
+#endif
+
+/***********************************
+** MEMORY ARRAY HANDLING ROUTINES **
+***********************************/
+/****************************
+** InitMemArray
+** Initialize the memory array. This simply amounts to
+** setting mem_array_ents to zero, indicating that there
+** isn't anything in the memory array.
+*/
+void InitMemArray(void)
+{
+mem_array_ents=0;
+}
+
+/***************************
+** AddMemArray
+** Add a pair of items to the memory array.
+** true_addr is the true address (mem_array[0][n])
+** adj_addr is the adjusted address (mem_array[0][n])
+** Returns 0 if ok
+** -1 if not enough room
+*/
+int AddMemArray(ulong true_addr,
+ ulong adj_addr)
+{
+
+if(mem_array_ents>=MEM_ARRAY_SIZE)
+ return(-1);
+
+mem_array[0][mem_array_ents]=true_addr;
+mem_array[1][mem_array_ents]=adj_addr;
+mem_array_ents++;
+return(0);
+}
+
+/*************************
+** RemoveMemArray
+** Given an adjusted address value (mem_array[1][n]), locate
+** the entry and remove it from the mem_array.
+** Also returns the associated true address.
+** Returns 0 if ok
+** -1 if not found.
+*/
+int RemoveMemArray(ulong adj_addr,ulong *true_addr)
+{
+
+int i,j;
+
+/* Locate the item in the array. */
+for(i=0;i<mem_array_ents;i++)
+ if(mem_array[1][i]==adj_addr)
+ { /* Found it..bubble stuff down */
+ *true_addr=mem_array[0][i];
+ j=i;
+ while(j+1<mem_array_ents)
+ { mem_array[0][j]=mem_array[0][j+1];
+ mem_array[1][j]=mem_array[1][j+1];
+ j++;
+ }
+ mem_array_ents--;
+ return(0); /* Return if found */
+ }
+
+/* If we made it here...something's wrong...show error */
+return(-1);
+}
+
+/**********************************
+** FILE HANDLING ROUTINES **
+**********************************/
+
+/****************************
+** CreateFile
+** This routine accepts a filename for an argument and
+** creates that file in the current directory (unless the
+** name contains a path that overrides the current directory).
+** Note that the routine does not OPEN the file.
+** If the file exists, it is truncated to length 0.
+*/
+void CreateFile(char *filename,
+ int *errorcode)
+{
+
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+int fhandle; /* File handle used internally */
+
+fhandle=open(filename,O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);
+
+if(fhandle==-1)
+ *errorcode=ERROR_FILECREATE;
+else
+ *errorcode=0;
+
+/*
+** Since all we're doing here is creating the file,
+** go ahead and close it.
+*/
+close(fhandle);
+
+return;
+#endif
+
+#ifdef LINUX
+FILE *fhandle; /* File handle used internally */
+
+fhandle=fopen(filename,"w");
+
+if(fhandle==NULL)
+ *errorcode=ERROR_FILECREATE;
+else
+ *errorcode=0;
+
+/*
+** Since all we're doing here is creating the file,
+** go ahead and close it.
+*/
+fclose(fhandle);
+
+return;
+#endif
+}
+
+/****************************
+** bmOpenFile
+** Opens the file given by fname, returning its handle.
+** If an error occurs, returns its code in errorcode.
+** The file is opened in read-write exclusive mode.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+int bmOpenFile(char *fname, /* File name */
+ int *errorcode) /* Error code returned */
+{
+
+int fhandle; /* Returned file handle */
+
+fhandle=open(fname,O_BINARY | O_RDWR, S_IREAD | S_IWRITE);
+
+if(fhandle==-1)
+ *errorcode=ERROR_FILEOPEN;
+else
+ *errorcode=0;
+
+return(fhandle);
+}
+#endif
+
+
+#ifdef LINUX
+
+FILE *bmOpenFile(char *fname, /* File name */
+ int *errorcode) /* Error code returned */
+{
+
+FILE *fhandle; /* Returned file handle */
+
+fhandle=fopen(fname,"w+");
+
+if(fhandle==NULL)
+ *errorcode=ERROR_FILEOPEN;
+else
+ *errorcode=0;
+
+return(fhandle);
+}
+#endif
+
+
+/****************************
+** CloseFile
+** Closes the file identified by fhandle.
+** A more inocuous routine there never was.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!!
+*/
+void CloseFile(int fhandle, /* File handle */
+ int *errorcode) /* Returned error code */
+{
+
+close(fhandle);
+*errorcode=0;
+return;
+}
+#endif
+#ifdef LINUX
+void CloseFile(FILE *fhandle, /* File handle */
+ int *errorcode) /* Returned error code */
+{
+fclose(fhandle);
+*errorcode=0;
+return;
+}
+#endif
+
+/****************************
+** readfile
+** Read bytes from an opened file. This routine
+** is a combination seek-and-read.
+** Note that this routine expects the offset to be from
+** the beginning of the file.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+void readfile(int fhandle, /* File handle */
+ unsigned long offset, /* Offset into file */
+ unsigned long nbytes, /* # of bytes to read */
+ void *buffer, /* Buffer to read into */
+ int *errorcode) /* Returned error code */
+{
+
+long newoffset; /* New offset by lseek */
+int readcode; /* Return code from read */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=lseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{ *errorcode=ERROR_FILESEEK;
+ return;
+}
+
+/*
+** Do the read.
+*/
+readcode=read(fhandle,buffer,(unsigned)(nbytes & 0xFFFF));
+if(readcode==-1)
+ *errorcode=ERROR_FILEREAD;
+
+return;
+}
+#endif
+#ifdef LINUX
+void readfile(FILE *fhandle, /* File handle */
+ unsigned long offset, /* Offset into file */
+ unsigned long nbytes, /* # of bytes to read */
+ void *buffer, /* Buffer to read into */
+ int *errorcode) /* Returned error code */
+{
+
+long newoffset; /* New offset by fseek */
+size_t nelems; /* Expected return code from read */
+size_t readcode; /* Actual return code from read */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=fseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{ *errorcode=ERROR_FILESEEK;
+ return;
+}
+
+/*
+** Do the read.
+*/
+nelems=(size_t)(nbytes & 0xFFFF);
+readcode=fread(buffer,(size_t)1,nelems,fhandle);
+if(readcode!=nelems)
+ *errorcode=ERROR_FILEREAD;
+
+return;
+}
+#endif
+
+/****************************
+** writefile
+** writes bytes to an opened file. This routine is
+** a combination seek-and-write.
+** Note that this routine expects the offset to be from
+** the beinning of the file.
+*/
+#ifdef DOS16
+/*
+** DOS VERSION!!
+*/
+
+void writefile(int fhandle, /* File handle */
+ unsigned long offset, /* Offset into file */
+ unsigned long nbytes, /* # of bytes to read */
+ void *buffer, /* Buffer to read into */
+ int *errorcode) /* Returned error code */
+{
+
+long newoffset; /* New offset by lseek */
+int writecode; /* Return code from write */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=lseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{ *errorcode=ERROR_FILESEEK;
+ return;
+}
+
+/*
+** Do the write.
+*/
+writecode=write(fhandle,buffer,(unsigned)(nbytes & 0xFFFF));
+if(writecode==-1)
+ *errorcode=ERROR_FILEWRITE;
+
+return;
+}
+#endif
+
+#ifdef LINUX
+
+void writefile(FILE *fhandle, /* File handle */
+ unsigned long offset, /* Offset into file */
+ unsigned long nbytes, /* # of bytes to read */
+ void *buffer, /* Buffer to read into */
+ int *errorcode) /* Returned error code */
+{
+
+long newoffset; /* New offset by lseek */
+size_t nelems; /* Expected return code from write */
+size_t writecode; /* Actual return code from write */
+
+/*
+** Presume success.
+*/
+*errorcode=0;
+
+/*
+** Seek to the proper offset.
+*/
+newoffset=fseek(fhandle,(long)offset,SEEK_SET);
+if(newoffset==-1L)
+{ *errorcode=ERROR_FILESEEK;
+ return;
+}
+
+/*
+** Do the write.
+*/
+nelems=(size_t)(nbytes & 0xFFFF);
+writecode=fwrite(buffer,(size_t)1,nelems,fhandle);
+if(writecode==nelems)
+ *errorcode=ERROR_FILEWRITE;
+
+return;
+}
+#endif
+
+
+/********************************
+** ERROR HANDLING ROUTINES **
+********************************/
+
+/****************************
+** ReportError
+** Report error message condition.
+*/
+void ReportError(char *errorcontext, /* Error context string */
+ int errorcode) /* Error code number */
+{
+
+/*
+** Display error context
+*/
+printf("ERROR CONDITION\nContext: %s\n",errorcontext);
+
+/*
+** Display code
+*/
+printf("Code: %d",errorcode);
+
+return;
+}
+
+/****************************
+** ErrorExit
+** Peforms an exit from an error condition.
+*/
+void ErrorExit()
+{
+
+/*
+** For profiling on the Mac with MetroWerks -- 11/17/94 RG
+** Have to do this to turn off profiler.
+*/
+#ifdef MACCWPROF
+#if __profile__
+ProfilerTerm();
+#endif
+#endif
+
+/*
+** FOR NOW...SIMPLE EXIT
+*/
+exit(1);
+}
+
+/*****************************
+** STOPWATCH ROUTINES **
+*****************************/
+
+/****************************
+** StartStopwatch
+** Starts a software stopwatch. Returns the first value of
+** the stopwatch in ticks.
+*/
+unsigned long StartStopwatch()
+{
+#ifdef MACTIMEMGR
+/*
+** For Mac code warrior, use timer. In this case, what we return is really
+** a dummy value.
+*/
+InsTime((QElemPtr)&myTMTask);
+PrimeTime((QElemPtr)&myTMTask,-MacHSTdelay);
+return((unsigned long)1);
+#else
+#ifdef WIN31TIMER
+/*
+** Win 3.x timer returns a DWORD, which we coax into a long.
+*/
+_Call16(lpfn,"p",&win31tinfo);
+return((unsigned long)win31tinfo.dwmsSinceStart);
+#else
+return((unsigned long)clock());
+#endif
+#endif
+}
+
+/****************************
+** StopStopwatch
+** Stops the software stopwatch. Expects as an input argument
+** the stopwatch start time.
+*/
+unsigned long StopStopwatch(unsigned long startticks)
+{
+
+#ifdef MACTIMEMGR
+/*
+** For Mac code warrior...ignore startticks. Return val. in microseconds
+*/
+RmvTime((QElemPtr)&myTMTask);
+return((unsigned long)(MacHSTdelay+myTMTask.tmCount-MacHSTohead));
+#else
+#ifdef WIN31TIMER
+_Call16(lpfn,"p",&win31tinfo);
+return((unsigned long)win31tinfo.dwmsSinceStart-startticks);
+#else
+return((unsigned long)clock()-startticks);
+#endif
+#endif
+}
+
+/****************************
+** TicksToSecs
+** Converts ticks to seconds. Converts ticks to integer
+** seconds, discarding any fractional amount.
+*/
+unsigned long TicksToSecs(unsigned long tickamount)
+{
+#ifdef CLOCKWCT
+return((unsigned long)(tickamount/CLK_TCK));
+#endif
+
+#ifdef MACTIMEMGR
+/* +++ MAC time manager version (using timer in microseconds) +++ */
+return((unsigned long)(tickamount/1000000));
+#endif
+
+#ifdef CLOCKWCPS
+/* Everybody else */
+//int tickspersec;
+//rtems_clock_get(RTEMS_CLOCK_GET_TICKS_PER_SECOND, &tickspersec);
+return((unsigned long)(tickamount/10000));
+#endif
+
+#ifdef WIN31TIMER
+/* Each tick is 840 nanoseconds */
+return((unsigned long)(tickamount/1000L));
+#endif
+
+}
+
+/****************************
+** TicksToFracSecs
+** Converts ticks to fractional seconds. In other words,
+** this returns the exact conversion from ticks to
+** seconds.
+*/
+double TicksToFracSecs(unsigned long tickamount)
+{
+#ifdef CLOCKWCT
+return((double)tickamount/(double)CLK_TCK);
+#endif
+
+#ifdef MACTIMEMGR
+/* +++ MAC time manager version +++ */
+return((double)tickamount/(double)1000000);
+#endif
+
+#ifdef CLOCKWCPS
+/* Everybody else */
+//int tickspersec;
+//rtems_clock_get(RTEMS_CLOCK_GET_TICKS_PER_SECOND, &tickspersec);
+
+return((double)tickamount/(double)1000);
+#endif
+
+#ifdef WIN31TIMER
+/* Using 840 nanosecond ticks */
+return((double)tickamount/(double)1000);
+#endif
+}
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/sysspec.h b/benchmarks/nbench/nbench-byte-2.2.3/sysspec.h
new file mode 100644
index 0000000..ba57a96
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/sysspec.h
@@ -0,0 +1,168 @@
+/*
+** sysspec.h
+** Header file for sysspec.c
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Standard includes
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+
+#include "nmglobal.h"
+
+#if !defined(MAC) && !defined(OSX)
+#include <malloc.h>
+#endif
+
+
+/*
+** System-specific includes
+*/
+
+#ifdef DOS16MEM
+#include "dos.h"
+#endif
+
+/* #include "time.h"
+#include "io.h"
+#include "fcntl.h"
+#include "sys\stat.h" */
+/* Removed for MSVC++
+#include "alloc.h"
+*/
+
+/*
+** MAC Time Manager routines (from Code Warrior)
+*/
+#ifdef MACTIMEMGR
+#include <memory.h>
+#include <lowmem.h>
+#include <Types.h>
+#include <Timer.h>
+extern struct TMTask myTMTask;
+extern long MacHSTdelay,MacHSTohead;
+#endif
+
+/*
+** Windows 3.1 timer defines
+*/
+#ifdef WIN31TIMER
+#include <windows.h>
+#include <toolhelp.h>
+TIMERINFO win31tinfo;
+HANDLE hThlp;
+FARPROC lpfn;
+#endif
+
+/**************
+** EXTERNALS **
+**************/
+extern ulong mem_array[2][MEM_ARRAY_SIZE];
+extern int mem_array_ents;
+extern int global_align;
+
+/****************************
+** FUNCTION PROTOTYPES **
+****************************/
+
+farvoid *AllocateMemory(unsigned long nbytes,
+ int *errorcode);
+
+void FreeMemory(farvoid *mempointer,
+ int *errorcode);
+
+void MoveMemory( farvoid *destination,
+ farvoid *source,
+ unsigned long nbytes);
+
+#ifdef DOS16MEM
+void FarDOSmemmove(farvoid *destination,
+ farvoid *source,
+ unsigned long nbytes);
+#endif
+
+void InitMemArray(void);
+
+int AddMemArray(ulong true_addr, ulong adj_addr);
+
+int RemoveMemArray(ulong adj_addr,ulong *true_addr);
+
+void ReportError(char *context, int errorcode);
+
+void ErrorExit();
+
+void CreateFile(char *filename,
+ int *errorcode);
+
+#ifdef DOS16
+int bmOpenFile(char *fname,
+ int *errorcode);
+
+void CloseFile(int fhandle,
+ int *errorcode);
+
+void readfile(int fhandle,
+ unsigned long offset,
+ unsigned long nbytes,
+ void *buffer,
+ int *errorcode);
+
+void writefile(int fhandle,
+ unsigned long offset,
+ unsigned long nbytes,
+ void *buffer,
+ int *errorcode);
+#endif
+
+#ifdef LINUX
+FILE *bmOpenFile(char *fname,
+ int *errorcode);
+
+void CloseFile(FILE *fhandle,
+ int *errorcode);
+
+void readfile(FILE *fhandle,
+ unsigned long offset,
+ unsigned long nbytes,
+ void *buffer,
+ int *errorcode);
+
+void writefile(FILE *fhandle,
+ unsigned long offset,
+ unsigned long nbytes,
+ void *buffer,
+ int *errorcode);
+
+#endif
+
+unsigned long StartStopwatch();
+
+unsigned long StopStopwatch(unsigned long startticks);
+
+unsigned long TicksToSecs(unsigned long tickamount);
+
+double TicksToFracSecs(unsigned long tickamount);
+
diff --git a/benchmarks/nbench/nbench-byte-2.2.3/wordcat.h b/benchmarks/nbench/nbench-byte-2.2.3/wordcat.h
new file mode 100644
index 0000000..9f18b42
--- /dev/null
+++ b/benchmarks/nbench/nbench-byte-2.2.3/wordcat.h
@@ -0,0 +1,81 @@
+/*
+** wordcat.h
+** Word catalog
+** BYTEmark (tm)
+** BYTE's Native Mode Benchmarks
+** Rick Grehan, BYTE Magazine
+**
+** Creation:
+** Revision: 3/95
+**
+** DISCLAIMER
+** The source, executable, and documentation files that comprise
+** the BYTEmark benchmarks are made available on an "as is" basis.
+** This means that we at BYTE Magazine have made every reasonable
+** effort to verify that the there are no errors in the source and
+** executable code. We cannot, however, guarantee that the programs
+** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
+** no claims in regard to the fitness of the source code, executable
+** code, and documentation of the BYTEmark.
+** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
+** of McGraw-Hill cannot be held responsible for any damages resulting
+** from the use of this code or the results obtained from using
+** this code.
+*/
+
+/*
+** Word catalog
+*/
+#define WORDCATSIZE 50
+
+char *wordcatarray[WORDCATSIZE] =
+{ "Hello",
+ "He",
+ "Him",
+ "the",
+ "this",
+ "that",
+ "though",
+ "rough",
+ "cough",
+ "obviously",
+ "But",
+ "but",
+ "bye",
+ "begin",
+ "beginning",
+ "beginnings",
+ "of",
+ "our",
+ "ourselves",
+ "yourselves",
+ "to",
+ "together",
+ "togetherness",
+ "from",
+ "either",
+ "I",
+ "A",
+ "return",
+ "However",
+ "that",
+ "example",
+ "yet",
+ "quickly",
+ "all",
+ "if",
+ "were",
+ "includes",
+ "always",
+ "never",
+ "not",
+ "small",
+ "returns",
+ "set",
+ "basic",
+ "Entered",
+ "with",
+ "used",
+ "shown",
+ "you",
+ "know" };