summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeffrey O. Hill <hill@wombat.lanl.gov>2013-02-05 18:03:30 +0100
committerSebastian Huber <sebastian.huber@embedded-brains.de>2013-03-11 17:57:41 +0100
commitd8e44ecaf82b9063a7e556f2e040069d446c3ac6 (patch)
tree22744f7adc06977384670e188fc7eef1efa7ccff
parentRTEMS: Delete ChangeLog files. (diff)
downloadrtems-d8e44ecaf82b9063a7e556f2e040069d446c3ac6.tar.bz2
nios2: Add optimized IP checksum support
-rw-r--r--cpukit/libnetworking/machine/in_cksum.h69
-rw-r--r--cpukit/libnetworking/netinet/in_cksum.c4
-rw-r--r--cpukit/libnetworking/netinet/in_cksum_nios2.h292
3 files changed, 365 insertions, 0 deletions
diff --git a/cpukit/libnetworking/machine/in_cksum.h b/cpukit/libnetworking/machine/in_cksum.h
index e9a4ea4027..e625726232 100644
--- a/cpukit/libnetworking/machine/in_cksum.h
+++ b/cpukit/libnetworking/machine/in_cksum.h
@@ -1,4 +1,16 @@
/*
+ * Nios II version by Jeffrey O. Hill
+ *
+ * Copyright 2012. Los Alamos National Security, LLC.
+ * The Nios II specific part was produced under U.S. Government contract
+ * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL),
+ * which is operated by Los Alamos National Security, LLC for
+ * the U.S. Department of Energy. The U.S. Government has rights
+ * to use, reproduce, and distribute this software. NEITHER THE
+ * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY
+ * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR
+ * THE USE OF THIS SOFTWARE.
+ *
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
@@ -208,6 +220,63 @@ in_cksum_hdr(const struct ip *ip)
__tmpsum = (int)ntohs(ip->ip_sum) + 256; \
ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \
} while(0)
+
+/*
+ * Optimized version for the Altera Nios II softcore
+ */
+#elif defined ( __GNUC__ ) && defined ( __nios2__ )
+
+static inline uint32_t _NIOS2_Add_ones_complement ( const uint32_t a,
+ const uint32_t b )
+{
+ uint32_t sum;
+ uint32_t C;
+ __asm__ __volatile__ (
+ " add %0, %2, %3 \n" /* sum <= a + b */
+ " cmpltu %1, %0, %2 \n" /* C <= carryBit32 */
+ " add %0, %1, %0 \n" /* sum <= sum + C */
+ : "=&r"(sum), "=&r"(C)
+ : "r"(a), "r"(b)
+ );
+ return sum;
+}
+
+static inline uint16_t _NIOS2_Add_ones_complement_word_halves
+ ( const uint32_t a )
+{
+ uint16_t sum;
+ uint32_t tmp;
+ __asm__ __volatile__ (
+ " roli %1, %2, 16 \n" /* tmp <= a rotate left 16 */
+ " add %1, %2, %1 \n" /* tmp <= a + tmp + carryBit16 */
+ " srli %0, %1, 16 \n" /* sum <= tmp shift right 16 */
+ : "=&r"(sum),"=&r"(tmp)
+ : "r"(a)
+ );
+ return sum;
+}
+
+static __inline u_int in_cksum_hdr ( const struct ip * pHdrIP )
+{
+ const uint32_t * const pWd = ( const uint32_t * ) pHdrIP;
+ uint32_t sum = pWd[0];
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+ sum = _NIOS2_Add_ones_complement_word_halves ( sum );
+ sum ^= 0xffff;
+ return sum;
+}
+
+static __inline void in_cksum_update ( struct ip * pHdrIP )
+{
+ uint32_t __tmpsum = ntohs ( pHdrIP->ip_sum );
+ __tmpsum += 256u;
+ __tmpsum += __tmpsum >> 16u;
+ pHdrIP->ip_sum = htons ( ( uint16_t ) __tmpsum );
+}
+
/*
* Here is the generic, portable, inefficient algorithm.
*/
diff --git a/cpukit/libnetworking/netinet/in_cksum.c b/cpukit/libnetworking/netinet/in_cksum.c
index dfe39b1d7f..ec7e49cd82 100644
--- a/cpukit/libnetworking/netinet/in_cksum.c
+++ b/cpukit/libnetworking/netinet/in_cksum.c
@@ -57,6 +57,10 @@
#include "in_cksum_powerpc.h"
+#elif (defined(__GNUC__) && defined(__nios2__))
+
+#include "in_cksum_nios2.h"
+
#else
#include <stdio.h> /* for puts */
diff --git a/cpukit/libnetworking/netinet/in_cksum_nios2.h b/cpukit/libnetworking/netinet/in_cksum_nios2.h
new file mode 100644
index 0000000000..8382f1117a
--- /dev/null
+++ b/cpukit/libnetworking/netinet/in_cksum_nios2.h
@@ -0,0 +1,292 @@
+
+/*
+ * Altera Nios2 CRC checksum computation
+ *
+ * Author: Jeffrey O. Hill
+ *
+ * Copyright 2012. Los Alamos National Security, LLC.
+ * This material was produced under U.S. Government contract
+ * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL),
+ * which is operated by Los Alamos National Security, LLC for
+ * the U.S. Department of Energy. The U.S. Government has rights
+ * to use, reproduce, and distribute this software. NEITHER THE
+ * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY
+ * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR
+ * THE USE OF THIS SOFTWARE.
+ *
+ * COPYRIGHT (c) 1989-2012.
+ * On-Line Applications Research Corporation (OAR).
+ *
+ * Copyright (c) 1997 Mark Brinicome
+ * Copyright (c) 1997 Causality Limited
+ *
+ * Copyright (c) 1995 Zubin Dittia.
+ * Copyright (c) 1995 Matthew R. Green.
+ * Copyright (c) 1994 Charles M. Hannum.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Based on the arm / sparc version, but using instead
+ * mostly inline functions in place of naaasty macros.
+ *
+ * It would be a great idea to somehow detect at runtime
+ * that the Nios2 has a user defined instruction that
+ * computes the CRC and invoke it here (we could call a
+ * function in the BSP).
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <machine/in_cksum.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+static inline uint32_t _NIOS2_Add_ones_complement_64
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[5] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[6] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[7] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[8] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[9] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[10] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[11] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[12] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[13] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[14] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[15] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_32
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[5] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[6] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[7] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_16
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[3] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_8
+( uint32_t sum, const uint32_t * const pWd )
+{
+ sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+ return _NIOS2_Add_ones_complement ( sum, pWd[1] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_4
+( uint32_t sum, const uint32_t * const pWd )
+{
+ return _NIOS2_Add_ones_complement ( sum, pWd[0] );
+}
+
+static inline uint32_t _NIOS2_Reduce_checksum ( uint32_t a )
+{
+ uint32_t tmp;
+ __asm__ __volatile__ (
+ " srli %1, %0, 16 \n" /* tmp = a >> 16 */
+ " andi %0, %0, 0xffff \n" /* a = a & 0xffff */
+ " add %0, %0, %1 \n" /* a = a + tmp */
+ : "+&r"(a), "=&r"(tmp)
+ );
+ return a;
+}
+
+#define combineTokens( A, B ) A ## B
+
+#define ADD_AND_ADVANCE( N ) \
+if ( mlen >= N ) { \
+ sum = combineTokens ( _NIOS2_Add_ones_complement_, N ) \
+ ( sum, ( uint32_t * ) w ); \
+ mlen -= N; \
+ w += N; \
+}
+
+static int
+in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
+{
+ const uint8_t * w;
+ int mlen = 0;
+ int byte_swapped = 0;
+
+ for (; m && len; m = m->m_next)
+ {
+ if (m->m_len == 0)
+ continue;
+ w = mtod(m, u_char *) + off;
+ mlen = m->m_len - off;
+ off = 0;
+ if (len < mlen)
+ mlen = len;
+ len -= mlen;
+
+ /*
+ * Ensure that we're aligned on a word boundary here so
+ * that we can do 32 bit operations below.
+ */
+ if ((3 & (uint32_t)w) != 0)
+ {
+ sum = _NIOS2_Reduce_checksum ( sum );
+ if ((1 & (uint32_t)w) != 0 && mlen >= 1)
+ {
+ sum <<= 8u;
+ sum += *w << 8u;
+ byte_swapped ^= 1;
+ w += 1;
+ mlen -= 1;
+ }
+ if ((2 & (uint32_t)w) != 0 && mlen >= 2)
+ {
+ sum += *(uint16_t *)w;
+ w += 2;
+ mlen -= 2;
+ }
+ }
+
+ /*
+ * instead of using a loop, process in unrolled chunks
+ */
+ while ( mlen >= 64 )
+ {
+ sum = _NIOS2_Add_ones_complement_64
+ ( sum, ( uint32_t * ) w );
+ mlen -= 64;
+ w += 64;
+ }
+ ADD_AND_ADVANCE ( 32 );
+ ADD_AND_ADVANCE ( 16 );
+ ADD_AND_ADVANCE ( 8 );
+ ADD_AND_ADVANCE ( 4 );
+
+ if ( mlen > 0 )
+ {
+ sum = _NIOS2_Reduce_checksum ( sum );
+ if ( mlen >= 2 )
+ {
+ sum += *(uint16_t *)w;
+ w += 2;
+ mlen -= 2;
+ }
+ if ( mlen == 1 )
+ {
+ sum <<= 8u;
+ sum += *w << 8u;
+ byte_swapped ^= 1;
+ }
+ }
+ }
+ if ( byte_swapped )
+ {
+ sum = _NIOS2_Reduce_checksum ( sum );
+ sum <<= 8u;
+ }
+ sum = _NIOS2_Add_ones_complement_word_halves ( sum );
+ sum ^= 0xffff;
+ return sum;
+}
+
+int
+in_cksum (
+ struct mbuf *m,
+ int len )
+{
+ return in_cksum_internal ( m, 0, len, 0 );
+}
+
+int
+in4_cksum (
+ struct mbuf *m,
+ u_int8_t nxt,
+ int off,
+ int len )
+{
+ u_int sum = 0;
+
+ if ( nxt != 0 )
+ {
+ struct ipovly ipov;
+ /* pseudo header */
+ if (off < sizeof(struct ipovly))
+ panic("in4_cksum: offset too short");
+ if (m->m_len < sizeof(struct ip))
+ panic("in4_cksum: bad mbuf chain");
+
+ bzero(&ipov, sizeof(ipov));
+ ipov.ih_len = htons(len);
+ ipov.ih_pr = nxt;
+ ipov.ih_src = mtod(m, struct ip *)->ip_src;
+ ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+ u_char * w = (u_char *)&ipov;
+
+ if ( sizeof(ipov) != 20 )
+ panic( "in4_cksum: sizeof(ipov) != 20" );
+ sum = _NIOS2_Add_ones_complement_16 ( sum, (uint32_t *) w );
+ w += 16;
+ sum = _NIOS2_Add_ones_complement_4 ( sum, (uint32_t *) w );
+ }
+ /* skip unnecessary part */
+ while (m && off > 0)
+ {
+ if (m->m_len > off)
+ break;
+ off -= m->m_len;
+ m = m->m_next;
+ }
+ return (in_cksum_internal(m, off, len, sum));
+}
+
+