From 4de817dfd2ea96aec13ee7071a189e3c35bb0efd Mon Sep 17 00:00:00 2001 From: Joel Sherrill Date: Fri, 21 Aug 1998 18:14:27 +0000 Subject: Added i386 specific version of in_cksum.c and restructured the main file to switch out to CPU specific implementations. --- c/src/exec/libnetworking/netinet/in_cksum.c | 157 +----------- c/src/exec/libnetworking/netinet/in_cksum_i386.c | 198 +++++++++++++++ c/src/exec/libnetworking/netinet/in_cksum_m68k.c | 311 +++++++++++++++++++++++ 3 files changed, 515 insertions(+), 151 deletions(-) create mode 100644 c/src/exec/libnetworking/netinet/in_cksum_i386.c create mode 100644 c/src/exec/libnetworking/netinet/in_cksum_m68k.c (limited to 'c/src/exec/libnetworking') diff --git a/c/src/exec/libnetworking/netinet/in_cksum.c b/c/src/exec/libnetworking/netinet/in_cksum.c index a2889f878a..9cffad3221 100644 --- a/c/src/exec/libnetworking/netinet/in_cksum.c +++ b/c/src/exec/libnetworking/netinet/in_cksum.c @@ -37,165 +37,20 @@ #include #include -#if (defined(__GNUC__) && (defined(__mc68000__) || defined(__m68k__))) - -#define REDUCE { sum = (sum & 0xFFFF) + (sum >> 16); if (sum > 0xFFFF) sum -= 0xFFFF; } - /* - * Motorola 68k version of Internet Protocol Checksum routine - * W. Eric Norum - * Saskatchewan Accelerator Laboratory - * August, 1998 + * Try to use a CPU specific version, then punt to the portable C one. */ -int -in_cksum(m, len) - struct mbuf *m; - int len; -{ - unsigned short *w; - unsigned long sum = 0; - int mlen = 0; - int byte_swapped = 0; - union { - char c[2]; - u_short s; - } s_util; - for ( ; m && len ; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_short *); - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else - mlen = m->m_len; - if (len < mlen) - mlen = len; - len -= mlen; - - /* - * Force to longword boundary. - */ - if (3 & (int)w) { - REDUCE; - if ((1 & (int) w) && (mlen > 0)) { - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)((char *)w + 1); - mlen--; - byte_swapped = 1; - } - if ((2 & (int) w) && (mlen >= 2)) { - sum += *w++; - mlen -= 2; - } - } +#if (defined(__GNUC__) && (defined(__mc68000__) || defined(__m68k__))) - /* - * Sum all the longwords in the buffer. - * See RFC 1071 -- Computing the Internet Checksum. - * It should work for all 68k family members. - */ - { - unsigned long tcnt = mlen, t1; - __asm__ volatile ( - "movel %2,%3\n\t" - "lsrl #6,%2 | count/64 = # loop traversals\n\t" - "andl #0x3c,%3 | Then find fractions of a chunk\n\t" - "negl %3\n\t | Each long uses 4 instruction bytes\n\t" - "andi #0xf,%%cc | Clear X (extended carry flag)\n\t" - "jmp %%pc@(lcsum2_lbl-.-2:b,%3) | Jump into loop\n" - "lcsum1_lbl: | Begin inner loop...\n\t" - "movel %1@+,%3 | 0: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 1: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 2: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 3: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 4: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 5: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 6: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 7: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 8: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | 9: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | A: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | B: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | C: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | D: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | E: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n\t" - "movel %1@+,%3 | F: Fetch 32-bit word\n\t" - "addxl %3,%0 | Add word + previous carry\n" - "lcsum2_lbl:\n\tdbf %2,lcsum1_lbl | (NB- dbra doesn't affect X)\n\t" - "movel %0,%3 | Fold 32 bit sum to 16 bits\n\t" - "swap %3 | (NB- swap doesn't affect X)\n\t" - "addxw %3,%0 |\n\t" - "moveq #0,%3 | Add in last carry\n\t" - "addxw %3,%0 |\n\t" - "andl #0xffff,%0 | Mask to 16-bit sum\n" : - "=d" (sum), "=a" (w), "=d" (tcnt) , "=d" (t1) : - "0" (sum), "1" (w), "2" (tcnt) : - "cc", "memory"); - } - mlen &= 3; +#include "in_cksum_m68k.c" - /* - * Soak up the last 1, 2 or 3 bytes - */ - while ((mlen -= 2) >= 0) - sum += *w++; - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - sum = 0xDEAD; - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; - } - REDUCE; - return (~sum & 0xffff); -} +#elif (defined(__GNUC__) && defined(__i386__)) +#include "in_cksum_i386.c" #else + /* * Checksum routine for Internet Protocol family headers (Portable Version). * diff --git a/c/src/exec/libnetworking/netinet/in_cksum_i386.c b/c/src/exec/libnetworking/netinet/in_cksum_i386.c new file mode 100644 index 0000000000..2057ab3fd6 --- /dev/null +++ b/c/src/exec/libnetworking/netinet/in_cksum_i386.c @@ -0,0 +1,198 @@ +/* + * Checksum routine for Internet Protocol family headers. + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * This implementation is 386 version. + * + * $Id$ + */ + +#undef ADDCARRY +#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff +#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} + +/* + * Thanks to gcc we don't have to guess + * which registers contain sum & w. + */ +#define ADD(n) asm("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) +#define ADDC(n) asm("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w)) +#define LOAD(n) asm volatile("movb " #n "(%1), %0" : "=r" (junk) : "r" (w)) +#define MOP asm("adcl $0, %0" : "=r" (sum) : "0" (sum)) + +int +in_cksum(m, len) + register struct mbuf *m; + register int len; +{ + register u_short *w; + register unsigned sum = 0; + register int mlen = 0; + int byte_swapped = 0; + union { char c[2]; u_short s; } su; + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_short *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + */ + + /* su.c[0] is already saved when scanning previous + * mbuf. sum was REDUCEd when we found mlen == -1 + */ + su.c[1] = *(u_char *)w; + sum += su.s; + w = (u_short *)((char *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to long boundary so we do longword aligned + * memory operations + */ + if (3 & (int) w) { + REDUCE; + if ((1 & (int) w) && (mlen > 0)) { + sum <<= 8; + su.c[0] = *(char *)w; + w = (u_short *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + if ((2 & (int) w) && (mlen >= 2)) { + sum += *w++; + mlen -= 2; + } + } + /* + * Advance to a 486 cache line boundary. + */ + if (4 & (int) w && mlen >= 4) { + ADD(0); + MOP; + w += 2; + mlen -= 4; + } + if (8 & (int) w && mlen >= 8) { + ADD(0); + ADDC(4); + MOP; + w += 4; + mlen -= 8; + } + /* + * Do as much of the checksum as possible 32 bits at at time. + * In fact, this loop is unrolled to make overhead from + * branches &c small. + */ + mlen -= 1; + while ((mlen -= 32) >= 0) { + u_char junk; + /* + * Add with carry 16 words and fold in the last + * carry by adding a 0 with carry. + * + * The early ADD(16) and the LOAD(32) are to load + * the next 2 cache lines in advance on 486's. The + * 486 has a penalty of 2 clock cycles for loading + * a cache line, plus whatever time the external + * memory takes to load the first word(s) addressed. + * These penalties are unavoidable. Subsequent + * accesses to a cache line being loaded (and to + * other external memory?) are delayed until the + * whole load finishes. These penalties are mostly + * avoided by not accessing external memory for + * 8 cycles after the ADD(16) and 12 cycles after + * the LOAD(32). The loop terminates when mlen + * is initially 33 (not 32) to guaranteed that + * the LOAD(32) is within bounds. + */ + ADD(16); + ADDC(0); + ADDC(4); + ADDC(8); + ADDC(12); + LOAD(32); + ADDC(20); + ADDC(24); + ADDC(28); + MOP; + w += 16; + } + mlen += 32 + 1; + if (mlen >= 32) { + ADD(16); + ADDC(0); + ADDC(4); + ADDC(8); + ADDC(12); + ADDC(20); + ADDC(24); + ADDC(28); + MOP; + w += 16; + mlen -= 32; + } + if (mlen >= 16) { + ADD(0); + ADDC(4); + ADDC(8); + ADDC(12); + MOP; + w += 8; + mlen -= 16; + } + if (mlen >= 8) { + ADD(0); + ADDC(4); + MOP; + w += 4; + mlen -= 8; + } + if (mlen == 0 && byte_swapped == 0) + continue; /* worth 1% maybe ?? */ + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + su.c[1] = *(char *)w; + sum += su.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + /* + * This mbuf has odd number of bytes. + * There could be a word split betwen + * this mbuf and the next mbuf. + * Save the last byte (to prepend to next mbuf). + */ + su.c[0] = *(char *)w; + } + + if (len) + printf("cksum: out of data\n"); + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte is shifted left by 8 bits) */ + su.c[1] = 0; + sum += su.s; + } + REDUCE; + return (~sum & 0xffff); +} diff --git a/c/src/exec/libnetworking/netinet/in_cksum_m68k.c b/c/src/exec/libnetworking/netinet/in_cksum_m68k.c new file mode 100644 index 0000000000..a2889f878a --- /dev/null +++ b/c/src/exec/libnetworking/netinet/in_cksum_m68k.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + * $Id$ + */ + +#include +#include + +#if (defined(__GNUC__) && (defined(__mc68000__) || defined(__m68k__))) + +#define REDUCE { sum = (sum & 0xFFFF) + (sum >> 16); if (sum > 0xFFFF) sum -= 0xFFFF; } + +/* + * Motorola 68k version of Internet Protocol Checksum routine + * W. Eric Norum + * Saskatchewan Accelerator Laboratory + * August, 1998 + */ +int +in_cksum(m, len) + struct mbuf *m; + int len; +{ + unsigned short *w; + unsigned long sum = 0; + int mlen = 0; + int byte_swapped = 0; + union { + char c[2]; + u_short s; + } s_util; + + for ( ; m && len ; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_short *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + * + * s_util.c[0] is already saved when scanning previous + * mbuf. + */ + s_util.c[1] = *(char *)w; + sum += s_util.s; + w = (u_short *)((char *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; + if (len < mlen) + mlen = len; + len -= mlen; + + /* + * Force to longword boundary. + */ + if (3 & (int)w) { + REDUCE; + if ((1 & (int) w) && (mlen > 0)) { + sum <<= 8; + s_util.c[0] = *(u_char *)w; + w = (u_short *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + if ((2 & (int) w) && (mlen >= 2)) { + sum += *w++; + mlen -= 2; + } + } + + /* + * Sum all the longwords in the buffer. + * See RFC 1071 -- Computing the Internet Checksum. + * It should work for all 68k family members. + */ + { + unsigned long tcnt = mlen, t1; + __asm__ volatile ( + "movel %2,%3\n\t" + "lsrl #6,%2 | count/64 = # loop traversals\n\t" + "andl #0x3c,%3 | Then find fractions of a chunk\n\t" + "negl %3\n\t | Each long uses 4 instruction bytes\n\t" + "andi #0xf,%%cc | Clear X (extended carry flag)\n\t" + "jmp %%pc@(lcsum2_lbl-.-2:b,%3) | Jump into loop\n" + "lcsum1_lbl: | Begin inner loop...\n\t" + "movel %1@+,%3 | 0: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 1: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 2: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 3: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 4: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 5: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 6: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 7: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 8: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | 9: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | A: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | B: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | C: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | D: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | E: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n\t" + "movel %1@+,%3 | F: Fetch 32-bit word\n\t" + "addxl %3,%0 | Add word + previous carry\n" + "lcsum2_lbl:\n\tdbf %2,lcsum1_lbl | (NB- dbra doesn't affect X)\n\t" + "movel %0,%3 | Fold 32 bit sum to 16 bits\n\t" + "swap %3 | (NB- swap doesn't affect X)\n\t" + "addxw %3,%0 |\n\t" + "moveq #0,%3 | Add in last carry\n\t" + "addxw %3,%0 |\n\t" + "andl #0xffff,%0 | Mask to 16-bit sum\n" : + "=d" (sum), "=a" (w), "=d" (tcnt) , "=d" (t1) : + "0" (sum), "1" (w), "2" (tcnt) : + "cc", "memory"); + } + mlen &= 3; + + /* + * Soak up the last 1, 2 or 3 bytes + */ + while ((mlen -= 2) >= 0) + sum += *w++; + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(char *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(char *)w; + } + if (len) + sum = 0xDEAD; + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} + + +#else +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE \ + {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + +int +in_cksum(m, len) + register struct mbuf *m; + register int len; +{ + register u_short *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union { + char c[2]; + u_short s; + } s_util; + union { + u_short s[2]; + long l; + } l_util; + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_short *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + * + * s_util.c[0] is already saved when scanning previous + * mbuf. + */ + s_util.c[1] = *(char *)w; + sum += s_util.s; + w = (u_short *)((char *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (int) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_char *)w; + w = (u_short *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + continue; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(char *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(char *)w; + } + if (len) + printf("cksum: out of data\n"); + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} +#endif -- cgit v1.2.3