From d8e44ecaf82b9063a7e556f2e040069d446c3ac6 Mon Sep 17 00:00:00 2001 From: "Jeffrey O. Hill" Date: Tue, 5 Feb 2013 18:03:30 +0100 Subject: nios2: Add optimized IP checksum support --- cpukit/libnetworking/machine/in_cksum.h | 69 ++++++ cpukit/libnetworking/netinet/in_cksum.c | 4 + cpukit/libnetworking/netinet/in_cksum_nios2.h | 292 ++++++++++++++++++++++++++ 3 files changed, 365 insertions(+) create mode 100644 cpukit/libnetworking/netinet/in_cksum_nios2.h diff --git a/cpukit/libnetworking/machine/in_cksum.h b/cpukit/libnetworking/machine/in_cksum.h index e9a4ea4027..e625726232 100644 --- a/cpukit/libnetworking/machine/in_cksum.h +++ b/cpukit/libnetworking/machine/in_cksum.h @@ -1,4 +1,16 @@ /* + * Nios II version by Jeffrey O. Hill + * + * Copyright 2012. Los Alamos National Security, LLC. + * The Nios II specific part was produced under U.S. Government contract + * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL), + * which is operated by Los Alamos National Security, LLC for + * the U.S. Department of Energy. The U.S. Government has rights + * to use, reproduce, and distribute this software. NEITHER THE + * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY + * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR + * THE USE OF THIS SOFTWARE. + * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * @@ -208,6 +220,63 @@ in_cksum_hdr(const struct ip *ip) __tmpsum = (int)ntohs(ip->ip_sum) + 256; \ ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \ } while(0) + +/* + * Optimized version for the Altera Nios II softcore + */ +#elif defined ( __GNUC__ ) && defined ( __nios2__ ) + +static inline uint32_t _NIOS2_Add_ones_complement ( const uint32_t a, + const uint32_t b ) +{ + uint32_t sum; + uint32_t C; + __asm__ __volatile__ ( + " add %0, %2, %3 \n" /* sum <= a + b */ + " cmpltu %1, %0, %2 \n" /* C <= carryBit32 */ + " add %0, %1, %0 \n" /* sum <= sum + C */ + : "=&r"(sum), "=&r"(C) + : "r"(a), "r"(b) + ); + return sum; +} + +static inline uint16_t _NIOS2_Add_ones_complement_word_halves + ( const uint32_t a ) +{ + uint16_t sum; + uint32_t tmp; + __asm__ __volatile__ ( + " roli %1, %2, 16 \n" /* tmp <= a rotate left 16 */ + " add %1, %2, %1 \n" /* tmp <= a + tmp + carryBit16 */ + " srli %0, %1, 16 \n" /* sum <= tmp shift right 16 */ + : "=&r"(sum),"=&r"(tmp) + : "r"(a) + ); + return sum; +} + +static __inline u_int in_cksum_hdr ( const struct ip * pHdrIP ) +{ + const uint32_t * const pWd = ( const uint32_t * ) pHdrIP; + uint32_t sum = pWd[0]; + sum = _NIOS2_Add_ones_complement ( sum, pWd[1] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[2] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[3] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[4] ); + sum = _NIOS2_Add_ones_complement_word_halves ( sum ); + sum ^= 0xffff; + return sum; +} + +static __inline void in_cksum_update ( struct ip * pHdrIP ) +{ + uint32_t __tmpsum = ntohs ( pHdrIP->ip_sum ); + __tmpsum += 256u; + __tmpsum += __tmpsum >> 16u; + pHdrIP->ip_sum = htons ( ( uint16_t ) __tmpsum ); +} + /* * Here is the generic, portable, inefficient algorithm. */ diff --git a/cpukit/libnetworking/netinet/in_cksum.c b/cpukit/libnetworking/netinet/in_cksum.c index dfe39b1d7f..ec7e49cd82 100644 --- a/cpukit/libnetworking/netinet/in_cksum.c +++ b/cpukit/libnetworking/netinet/in_cksum.c @@ -57,6 +57,10 @@ #include "in_cksum_powerpc.h" +#elif (defined(__GNUC__) && defined(__nios2__)) + +#include "in_cksum_nios2.h" + #else #include /* for puts */ diff --git a/cpukit/libnetworking/netinet/in_cksum_nios2.h b/cpukit/libnetworking/netinet/in_cksum_nios2.h new file mode 100644 index 0000000000..8382f1117a --- /dev/null +++ b/cpukit/libnetworking/netinet/in_cksum_nios2.h @@ -0,0 +1,292 @@ + +/* + * Altera Nios2 CRC checksum computation + * + * Author: Jeffrey O. Hill + * + * Copyright 2012. Los Alamos National Security, LLC. + * This material was produced under U.S. Government contract + * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL), + * which is operated by Los Alamos National Security, LLC for + * the U.S. Department of Energy. The U.S. Government has rights + * to use, reproduce, and distribute this software. NEITHER THE + * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY + * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR + * THE USE OF THIS SOFTWARE. + * + * COPYRIGHT (c) 1989-2012. + * On-Line Applications Research Corporation (OAR). + * + * Copyright (c) 1997 Mark Brinicome + * Copyright (c) 1997 Causality Limited + * + * Copyright (c) 1995 Zubin Dittia. + * Copyright (c) 1995 Matthew R. Green. + * Copyright (c) 1994 Charles M. Hannum. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Based on the arm / sparc version, but using instead + * mostly inline functions in place of naaasty macros. + * + * It would be a great idea to somehow detect at runtime + * that the Nios2 has a user defined instruction that + * computes the CRC and invoke it here (we could call a + * function in the BSP). + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Checksum routine for Internet Protocol family headers. + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ +static inline uint32_t _NIOS2_Add_ones_complement_64 +( uint32_t sum, const uint32_t * const pWd ) +{ + sum = _NIOS2_Add_ones_complement ( sum, pWd[0] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[1] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[2] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[3] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[4] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[5] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[6] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[7] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[8] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[9] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[10] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[11] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[12] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[13] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[14] ); + return _NIOS2_Add_ones_complement ( sum, pWd[15] ); +} + +static inline uint32_t _NIOS2_Add_ones_complement_32 +( uint32_t sum, const uint32_t * const pWd ) +{ + sum = _NIOS2_Add_ones_complement ( sum, pWd[0] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[1] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[2] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[3] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[4] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[5] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[6] ); + return _NIOS2_Add_ones_complement ( sum, pWd[7] ); +} + +static inline uint32_t _NIOS2_Add_ones_complement_16 +( uint32_t sum, const uint32_t * const pWd ) +{ + sum = _NIOS2_Add_ones_complement ( sum, pWd[0] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[1] ); + sum = _NIOS2_Add_ones_complement ( sum, pWd[2] ); + return _NIOS2_Add_ones_complement ( sum, pWd[3] ); +} + +static inline uint32_t _NIOS2_Add_ones_complement_8 +( uint32_t sum, const uint32_t * const pWd ) +{ + sum = _NIOS2_Add_ones_complement ( sum, pWd[0] ); + return _NIOS2_Add_ones_complement ( sum, pWd[1] ); +} + +static inline uint32_t _NIOS2_Add_ones_complement_4 +( uint32_t sum, const uint32_t * const pWd ) +{ + return _NIOS2_Add_ones_complement ( sum, pWd[0] ); +} + +static inline uint32_t _NIOS2_Reduce_checksum ( uint32_t a ) +{ + uint32_t tmp; + __asm__ __volatile__ ( + " srli %1, %0, 16 \n" /* tmp = a >> 16 */ + " andi %0, %0, 0xffff \n" /* a = a & 0xffff */ + " add %0, %0, %1 \n" /* a = a + tmp */ + : "+&r"(a), "=&r"(tmp) + ); + return a; +} + +#define combineTokens( A, B ) A ## B + +#define ADD_AND_ADVANCE( N ) \ +if ( mlen >= N ) { \ + sum = combineTokens ( _NIOS2_Add_ones_complement_, N ) \ + ( sum, ( uint32_t * ) w ); \ + mlen -= N; \ + w += N; \ +} + +static int +in_cksum_internal(struct mbuf *m, int off, int len, u_int sum) +{ + const uint8_t * w; + int mlen = 0; + int byte_swapped = 0; + + for (; m && len; m = m->m_next) + { + if (m->m_len == 0) + continue; + w = mtod(m, u_char *) + off; + mlen = m->m_len - off; + off = 0; + if (len < mlen) + mlen = len; + len -= mlen; + + /* + * Ensure that we're aligned on a word boundary here so + * that we can do 32 bit operations below. + */ + if ((3 & (uint32_t)w) != 0) + { + sum = _NIOS2_Reduce_checksum ( sum ); + if ((1 & (uint32_t)w) != 0 && mlen >= 1) + { + sum <<= 8u; + sum += *w << 8u; + byte_swapped ^= 1; + w += 1; + mlen -= 1; + } + if ((2 & (uint32_t)w) != 0 && mlen >= 2) + { + sum += *(uint16_t *)w; + w += 2; + mlen -= 2; + } + } + + /* + * instead of using a loop, process in unrolled chunks + */ + while ( mlen >= 64 ) + { + sum = _NIOS2_Add_ones_complement_64 + ( sum, ( uint32_t * ) w ); + mlen -= 64; + w += 64; + } + ADD_AND_ADVANCE ( 32 ); + ADD_AND_ADVANCE ( 16 ); + ADD_AND_ADVANCE ( 8 ); + ADD_AND_ADVANCE ( 4 ); + + if ( mlen > 0 ) + { + sum = _NIOS2_Reduce_checksum ( sum ); + if ( mlen >= 2 ) + { + sum += *(uint16_t *)w; + w += 2; + mlen -= 2; + } + if ( mlen == 1 ) + { + sum <<= 8u; + sum += *w << 8u; + byte_swapped ^= 1; + } + } + } + if ( byte_swapped ) + { + sum = _NIOS2_Reduce_checksum ( sum ); + sum <<= 8u; + } + sum = _NIOS2_Add_ones_complement_word_halves ( sum ); + sum ^= 0xffff; + return sum; +} + +int +in_cksum ( + struct mbuf *m, + int len ) +{ + return in_cksum_internal ( m, 0, len, 0 ); +} + +int +in4_cksum ( + struct mbuf *m, + u_int8_t nxt, + int off, + int len ) +{ + u_int sum = 0; + + if ( nxt != 0 ) + { + struct ipovly ipov; + /* pseudo header */ + if (off < sizeof(struct ipovly)) + panic("in4_cksum: offset too short"); + if (m->m_len < sizeof(struct ip)) + panic("in4_cksum: bad mbuf chain"); + + bzero(&ipov, sizeof(ipov)); + ipov.ih_len = htons(len); + ipov.ih_pr = nxt; + ipov.ih_src = mtod(m, struct ip *)->ip_src; + ipov.ih_dst = mtod(m, struct ip *)->ip_dst; + u_char * w = (u_char *)&ipov; + + if ( sizeof(ipov) != 20 ) + panic( "in4_cksum: sizeof(ipov) != 20" ); + sum = _NIOS2_Add_ones_complement_16 ( sum, (uint32_t *) w ); + w += 16; + sum = _NIOS2_Add_ones_complement_4 ( sum, (uint32_t *) w ); + } + /* skip unnecessary part */ + while (m && off > 0) + { + if (m->m_len > off) + break; + off -= m->m_len; + m = m->m_next; + } + return (in_cksum_internal(m, off, len, sum)); +} + + -- cgit v1.2.3