#include /*- * Copyright (c) 2007, Myricom Inc. * Copyright (c) 2008, Intel Corporation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static uint16_t do_csum_data(uint16_t *raw, int len) { uint32_t csum; csum = 0; while (len > 0) { csum += *raw; raw++; csum += *raw; raw++; len -= 4; } csum = (csum >> 16) + (csum & 0xffff); csum = (csum >> 16) + (csum & 0xffff); return (uint16_t)csum; } /* * Allocate and init the LRO data structures */ int tcp_lro_init(struct lro_ctrl *cntl) { struct lro_entry *lro; int i, error = 0; SLIST_INIT(&cntl->lro_free); SLIST_INIT(&cntl->lro_active); cntl->lro_bad_csum = 0; cntl->lro_queued = 0; cntl->lro_flushed = 0; for (i = 0; i < LRO_ENTRIES; i++) { lro = (struct lro_entry *) malloc(sizeof (struct lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); if (lro == NULL) { if (i == 0) error = ENOMEM; break; } cntl->lro_cnt = i; SLIST_INSERT_HEAD(&cntl->lro_free, lro, next); } return (error); } void tcp_lro_free(struct lro_ctrl *cntl) { struct lro_entry *entry; while (!SLIST_EMPTY(&cntl->lro_free)) { entry = SLIST_FIRST(&cntl->lro_free); SLIST_REMOVE_HEAD(&cntl->lro_free, next); free(entry, M_DEVBUF); } } void tcp_lro_flush(struct lro_ctrl *cntl, struct lro_entry *lro) { struct ifnet *ifp; struct ip *ip; struct tcphdr *tcp; uint32_t *ts_ptr; uint32_t tcplen, tcp_csum; if (lro->append_cnt) { /* incorporate the new len into the ip header and * re-calculate the checksum */ ip = lro->ip; ip->ip_len = htons(lro->len - ETHER_HDR_LEN); ip->ip_sum = 0; ip->ip_sum = 0xffff ^ do_csum_data((uint16_t*)ip, sizeof (*ip)); lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; lro->m_head->m_pkthdr.csum_data = 0xffff; lro->m_head->m_pkthdr.len = lro->len; /* incorporate the latest ack into the tcp header */ tcp = (struct tcphdr *) (ip + 1); tcp->th_ack = lro->ack_seq; tcp->th_win = lro->window; /* incorporate latest timestamp into the tcp header */ if (lro->timestamp) { ts_ptr = (uint32_t *)(tcp + 1); ts_ptr[1] = htonl(lro->tsval); ts_ptr[2] = lro->tsecr; } /* * update checksum in tcp header by re-calculating the * tcp pseudoheader checksum, and adding it to the checksum * of the tcp payload data */ tcp->th_sum = 0; tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN; tcp_csum = lro->data_csum; tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(tcplen + IPPROTO_TCP)); tcp_csum += do_csum_data((uint16_t*)tcp, tcp->th_off << 2); tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); tcp->th_sum = 0xffff ^ tcp_csum; } ifp = cntl->ifp; (*ifp->if_input)(cntl->ifp, lro->m_head); cntl->lro_queued += lro->append_cnt + 1; cntl->lro_flushed++; lro->m_head = NULL; lro->timestamp = 0; lro->append_cnt = 0; SLIST_INSERT_HEAD(&cntl->lro_free, lro, next); } int tcp_lro_rx(struct lro_ctrl *cntl, struct mbuf *m_head, uint32_t csum) { struct ether_header *eh; struct ip *ip; struct tcphdr *tcp; uint32_t *ts_ptr; struct mbuf *m_nxt, *m_tail; struct lro_entry *lro; int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len; int opt_bytes, trim, csum_flags; uint32_t seq, tmp_csum, device_mtu; eh = mtod(m_head, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return 1; ip = (struct ip *) (eh + 1); if (ip->ip_p != IPPROTO_TCP) return 1; /* ensure there are no options */ if ((ip->ip_hl << 2) != sizeof (*ip)) return -1; /* .. and the packet is not fragmented */ if (ip->ip_off & htons(IP_MF|IP_OFFMASK)) return -1; /* verify that the IP header checksum is correct */ csum_flags = m_head->m_pkthdr.csum_flags; if (csum_flags & CSUM_IP_CHECKED) { if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) { cntl->lro_bad_csum++; return -1; } } else { tmp_csum = do_csum_data((uint16_t *)ip, sizeof (*ip)); if (__predict_false((tmp_csum ^ 0xffff) != 0)) { cntl->lro_bad_csum++; return -1; } } /* find the TCP header */ tcp = (struct tcphdr *) (ip + 1); /* Get the TCP checksum if we dont have it */ if (!csum) csum = tcp->th_sum; /* ensure no bits set besides ack or psh */ if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) return -1; /* check for timestamps. Since the only option we handle are timestamps, we only have to handle the simple case of aligned timestamps */ opt_bytes = (tcp->th_off << 2) - sizeof (*tcp); tcp_hdr_len = sizeof (*tcp) + opt_bytes; ts_ptr = (uint32_t *)(tcp + 1); if (opt_bytes != 0) { if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) || (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))) return -1; } ip_len = ntohs(ip->ip_len); tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip); /* * If frame is padded beyond the end of the IP packet, * then we must trim the extra bytes off the end. */ tot_len = m_head->m_pkthdr.len; trim = tot_len - (ip_len + ETHER_HDR_LEN); if (trim != 0) { if (trim < 0) { /* truncated packet */ return -1; } m_adj(m_head, -trim); tot_len = m_head->m_pkthdr.len; } m_nxt = m_head; m_tail = NULL; /* -Wuninitialized */ while (m_nxt != NULL) { m_tail = m_nxt; m_nxt = m_tail->m_next; } hlen = ip_len + ETHER_HDR_LEN - tcp_data_len; seq = ntohl(tcp->th_seq); SLIST_FOREACH(lro, &cntl->lro_active, next) { if (lro->source_port == tcp->th_sport && lro->dest_port == tcp->th_dport && lro->source_ip == ip->ip_src.s_addr && lro->dest_ip == ip->ip_dst.s_addr) { /* Try to append it */ if (__predict_false(seq != lro->next_seq)) { /* out of order packet */ SLIST_REMOVE(&cntl->lro_active, lro, lro_entry, next); tcp_lro_flush(cntl, lro); return -1; } if (opt_bytes) { uint32_t tsval = ntohl(*(ts_ptr + 1)); /* make sure timestamp values are increasing */ if (__predict_false(lro->tsval > tsval || *(ts_ptr + 2) == 0)) { return -1; } lro->tsval = tsval; lro->tsecr = *(ts_ptr + 2); } lro->next_seq += tcp_data_len; lro->ack_seq = tcp->th_ack; lro->window = tcp->th_win; lro->append_cnt++; if (tcp_data_len == 0) { m_freem(m_head); return 0; } /* subtract off the checksum of the tcp header * from the hardware checksum, and add it to the * stored tcp data checksum. Byteswap the checksum * if the total length so far is odd */ tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len); csum = csum + (tmp_csum ^ 0xffff); csum = (csum & 0xffff) + (csum >> 16); csum = (csum & 0xffff) + (csum >> 16); if (lro->len & 0x1) { /* Odd number of bytes so far, flip bytes */ csum = ((csum << 8) | (csum >> 8)) & 0xffff; } csum = csum + lro->data_csum; csum = (csum & 0xffff) + (csum >> 16); csum = (csum & 0xffff) + (csum >> 16); lro->data_csum = csum; lro->len += tcp_data_len; /* adjust mbuf so that m->m_data points to the first byte of the payload */ m_adj(m_head, hlen); /* append mbuf chain */ lro->m_tail->m_next = m_head; /* advance the last pointer */ lro->m_tail = m_tail; /* flush packet if required */ device_mtu = cntl->ifp->if_mtu; if (lro->len > (65535 - device_mtu)) { SLIST_REMOVE(&cntl->lro_active, lro, lro_entry, next); tcp_lro_flush(cntl, lro); } return 0; } } if (SLIST_EMPTY(&cntl->lro_free)) return -1; /* start a new chain */ lro = SLIST_FIRST(&cntl->lro_free); SLIST_REMOVE_HEAD(&cntl->lro_free, next); SLIST_INSERT_HEAD(&cntl->lro_active, lro, next); lro->source_port = tcp->th_sport; lro->dest_port = tcp->th_dport; lro->source_ip = ip->ip_src.s_addr; lro->dest_ip = ip->ip_dst.s_addr; lro->next_seq = seq + tcp_data_len; lro->mss = tcp_data_len; lro->ack_seq = tcp->th_ack; lro->window = tcp->th_win; /* save the checksum of just the TCP payload by * subtracting off the checksum of the TCP header from * the entire hardware checksum * Since IP header checksum is correct, checksum over * the IP header is -0. Substracting -0 is unnecessary. */ tmp_csum = do_csum_data((uint16_t*)tcp, tcp_hdr_len); csum = csum + (tmp_csum ^ 0xffff); csum = (csum & 0xffff) + (csum >> 16); csum = (csum & 0xffff) + (csum >> 16); lro->data_csum = csum; lro->ip = ip; /* record timestamp if it is present */ if (opt_bytes) { lro->timestamp = 1; lro->tsval = ntohl(*(ts_ptr + 1)); lro->tsecr = *(ts_ptr + 2); } lro->len = tot_len; lro->m_head = m_head; lro->m_tail = m_tail; return 0; }