diff options
Diffstat (limited to 'freebsd/sys/netinet/if_ether.c')
-rw-r--r-- | freebsd/sys/netinet/if_ether.c | 1122 |
1 files changed, 784 insertions, 338 deletions
diff --git a/freebsd/sys/netinet/if_ether.c b/freebsd/sys/netinet/if_ether.c index eec06dd8..0a8b101e 100644 --- a/freebsd/sys/netinet/if_ether.c +++ b/freebsd/sys/netinet/if_ether.c @@ -44,39 +44,50 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <sys/kernel.h> +#include <rtems/bsd/sys/lock.h> #include <sys/queue.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/malloc.h> #include <sys/proc.h> +#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/syslog.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_dl.h> #include <net/if_types.h> #include <net/netisr.h> -#include <net/if_llc.h> #include <net/ethernet.h> #include <net/route.h> #include <net/vnet.h> #include <netinet/in.h> +#include <netinet/in_fib.h> #include <netinet/in_var.h> #include <net/if_llatbl.h> #include <netinet/if_ether.h> -#if defined(INET) +#ifdef INET #include <netinet/ip_carp.h> #endif -#include <net/if_arc.h> -#include <net/iso88025.h> - #include <security/mac/mac_framework.h> -#define SIN(s) ((struct sockaddr_in *)s) -#define SDL(s) ((struct sockaddr_dl *)s) +#define SIN(s) ((const struct sockaddr_in *)(s)) + +static struct timeval arp_lastlog; +static int arp_curpps; +static int arp_maxpps = 1; + +/* Simple ARP state machine */ +enum arp_llinfo_state { + ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */ + ARP_LLINFO_REACHABLE, /* LLE is valid */ + ARP_LLINFO_VERIFY, /* LLE is valid, need refresh */ + ARP_LLINFO_DELETED, /* LLE is deleted */ +}; SYSCTL_DECL(_net_link_ether); static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); @@ -86,53 +97,67 @@ static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 * minutes */ static VNET_DEFINE(int, arp_maxtries) = 5; -VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for - * local traffic */ static VNET_DEFINE(int, arp_proxyall) = 0; static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for * 20 seconds */ -VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +static VNET_DEFINE(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/ +VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ +VNET_PCPUSTAT_SYSINIT(arpstat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(arpstat); +#endif /* VIMAGE */ static VNET_DEFINE(int, arp_maxhold) = 1; #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) +#define V_arpt_rexmit VNET(arpt_rexmit) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) -#define V_arpstat VNET(arpstat) #define V_arp_maxhold VNET(arp_maxhold) -SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, "ARP entry lifetime in seconds"); -SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(arp_maxtries), 0, "ARP resolution attempts before returning error"); -SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, - &VNET_NAME(useloopback), 0, - "Use the loopback interface for local traffic"); -SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(arp_proxyall), 0, "Enable proxy ARP for all suitable requests"); -SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW, +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(arpt_down), 0, "Incomplete ARP entry lifetime in seconds"); -SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW, - &VNET_NAME(arpstat), arpstat, - "ARP statistics (struct arpstat, net/if_arp.h)"); -SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW, +SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat, + arpstat, "ARP statistics (struct arpstat, net/if_arp.h)"); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(arp_maxhold), 0, "Number of packets to hold per ARP entry"); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second, + CTLFLAG_RW, &arp_maxpps, 0, + "Maximum number of remotely triggered ARP messages that can be " + "logged per second"); + +#define ARP_LOG(pri, ...) do { \ + if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \ + log((pri), "arp: " __VA_ARGS__); \ +} while (0) + -static void arp_init(void); -void arprequest(struct ifnet *, - struct in_addr *, struct in_addr *, u_char *); static void arpintr(struct mbuf *); static void arptimer(void *); #ifdef INET static void in_arpinput(struct mbuf *); #endif +static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, + struct ifnet *ifp, int bridged, struct llentry *la); +static void arp_mark_lle_reachable(struct llentry *la); +static void arp_iflladdr(void *arg __unused, struct ifnet *ifp); + +static eventhandler_tag iflladdr_tag; + static const struct netisr_handler arp_nh = { .nh_name = "arp", .nh_handler = arpintr, @@ -140,29 +165,6 @@ static const struct netisr_handler arp_nh = { .nh_policy = NETISR_POLICY_SOURCE, }; -#ifdef AF_INET -void arp_ifscrub(struct ifnet *ifp, uint32_t addr); - -/* - * called by in_ifscrub to remove entry from the table when - * the interface goes away - */ -void -arp_ifscrub(struct ifnet *ifp, uint32_t addr) -{ - struct sockaddr_in addr4; - - bzero((void *)&addr4, sizeof(addr4)); - addr4.sin_len = sizeof(addr4); - addr4.sin_family = AF_INET; - addr4.sin_addr.s_addr = addr; - IF_AFDATA_WLOCK(ifp); - lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR), - (struct sockaddr *)&addr4); - IF_AFDATA_WUNLOCK(ifp); -} -#endif - /* * Timeout routine. Age arp_tab entries periodically. */ @@ -171,15 +173,83 @@ arptimer(void *arg) { struct llentry *lle = (struct llentry *)arg; struct ifnet *ifp; + int r_skip_req; if (lle->la_flags & LLE_STATIC) { - LLE_WUNLOCK(lle); return; } - + LLE_WLOCK(lle); + if (callout_pending(&lle->lle_timer)) { + /* + * Here we are a bit odd here in the treatment of + * active/pending. If the pending bit is set, it got + * rescheduled before I ran. The active + * bit we ignore, since if it was stopped + * in ll_tablefree() and was currently running + * it would have return 0 so the code would + * not have deleted it since the callout could + * not be stopped so we want to go through + * with the delete here now. If the callout + * was restarted, the pending bit will be back on and + * we just want to bail since the callout_reset would + * return 1 and our reference would have been removed + * by arpresolve() below. + */ + LLE_WUNLOCK(lle); + return; + } ifp = lle->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); + switch (lle->ln_state) { + case ARP_LLINFO_REACHABLE: + + /* + * Expiration time is approaching. + * Let's try to refresh entry if it is still + * in use. + * + * Set r_skip_req to get feedback from + * fast path. Change state and re-schedule + * ourselves. + */ + LLE_REQ_LOCK(lle); + lle->r_skip_req = 1; + LLE_REQ_UNLOCK(lle); + lle->ln_state = ARP_LLINFO_VERIFY; + callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); + LLE_WUNLOCK(lle); + CURVNET_RESTORE(); + return; + case ARP_LLINFO_VERIFY: + LLE_REQ_LOCK(lle); + r_skip_req = lle->r_skip_req; + LLE_REQ_UNLOCK(lle); + + if (r_skip_req == 0 && lle->la_preempt > 0) { + /* Entry was used, issue refresh request */ + struct in_addr dst; + dst = lle->r_l3addr.addr4; + lle->la_preempt--; + callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); + LLE_WUNLOCK(lle); + arprequest(ifp, NULL, &dst, NULL); + CURVNET_RESTORE(); + return; + } + /* Nothing happened. Reschedule if not too late */ + if (lle->la_expire > time_uptime) { + callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit); + LLE_WUNLOCK(lle); + CURVNET_RESTORE(); + return; + } + break; + case ARP_LLINFO_INCOMPLETE: + case ARP_LLINFO_DELETED: + break; + } + if ((lle->la_flags & LLE_DELETED) == 0) { int evt; @@ -190,7 +260,7 @@ arptimer(void *arg) EVENTHANDLER_INVOKE(lle_event, lle, evt); } - callout_stop(&lle->la_timer); + callout_stop(&lle->lle_timer); /* XXX: LOR avoidance. We still have ref on lle. */ LLE_WUNLOCK(lle); @@ -199,64 +269,109 @@ arptimer(void *arg) /* Guard against race with other llentry_free(). */ if (lle->la_flags & LLE_LINKED) { - size_t pkts_dropped; - LLE_REMREF(lle); - pkts_dropped = llentry_free(lle); - ARPSTAT_ADD(dropped, pkts_dropped); - } else - LLE_FREE_LOCKED(lle); - + lltable_unlink_entry(lle->lle_tbl, lle); + } IF_AFDATA_UNLOCK(ifp); + size_t pkts_dropped = llentry_free(lle); + + ARPSTAT_ADD(dropped, pkts_dropped); ARPSTAT_INC(timeouts); CURVNET_RESTORE(); } /* + * Stores link-layer header for @ifp in format suitable for if_output() + * into buffer @buf. Resulting header length is stored in @bufsize. + * + * Returns 0 on success. + */ +static int +arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf, + size_t *bufsize) +{ + struct if_encap_req ereq; + int error; + + bzero(buf, *bufsize); + bzero(&ereq, sizeof(ereq)); + ereq.buf = buf; + ereq.bufsize = *bufsize; + ereq.rtype = IFENCAP_LL; + ereq.family = AF_ARP; + ereq.lladdr = ar_tha(ah); + ereq.hdata = (u_char *)ah; + if (bcast) + ereq.flags = IFENCAP_FLAG_BROADCAST; + error = ifp->if_requestencap(ifp, &ereq); + if (error == 0) + *bufsize = ereq.bufsize; + + return (error); +} + + +/* * Broadcast an ARP request. Caller specifies: * - arp header source ip address * - arp header target ip address * - arp header source ethernet address */ void -arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, - u_char *enaddr) +arprequest(struct ifnet *ifp, const struct in_addr *sip, + const struct in_addr *tip, u_char *enaddr) { struct mbuf *m; struct arphdr *ah; struct sockaddr sa; + u_char *carpaddr = NULL; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + struct route ro; + int error; if (sip == NULL) { - /* XXX don't believe this can happen (or explain why) */ /* * The caller did not supply a source address, try to find * a compatible one among those assigned to this interface. */ struct ifaddr *ifa; + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (!ifa->ifa_addr || - ifa->ifa_addr->sa_family != AF_INET) + if (ifa->ifa_addr->sa_family != AF_INET) continue; - sip = &SIN(ifa->ifa_addr)->sin_addr; + + if (ifa->ifa_carp) { + if ((*carp_iamatch_p)(ifa, &carpaddr) == 0) + continue; + sip = &IA_SIN(ifa)->sin_addr; + } else { + carpaddr = NULL; + sip = &IA_SIN(ifa)->sin_addr; + } + if (0 == ((sip->s_addr ^ tip->s_addr) & - SIN(ifa->ifa_netmask)->sin_addr.s_addr) ) + IA_MASKSIN(ifa)->sin_addr.s_addr)) break; /* found it. */ } + IF_ADDR_RUNLOCK(ifp); if (sip == NULL) { printf("%s: cannot find matching address\n", __func__); return; } } + if (enaddr == NULL) + enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp); - if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) + if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) return; - m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) + - 2*ifp->if_data.ifi_addrlen; + m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) + + 2 * ifp->if_addrlen; m->m_pkthdr.len = m->m_len; - MH_ALIGN(m, m->m_len); + M_ALIGN(m, m->m_len); ah = mtod(m, struct arphdr *); bzero((caddr_t)ah, m->m_len); #ifdef MAC @@ -266,109 +381,121 @@ arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, ah->ar_hln = ifp->if_addrlen; /* hardware address length */ ah->ar_pln = sizeof(struct in_addr); /* protocol address length */ ah->ar_op = htons(ARPOP_REQUEST); - bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln); - bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln); - bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln); + bcopy(enaddr, ar_sha(ah), ah->ar_hln); + bcopy(sip, ar_spa(ah), ah->ar_pln); + bcopy(tip, ar_tpa(ah), ah->ar_pln); sa.sa_family = AF_ARP; sa.sa_len = 2; + + /* Calculate link header for sending frame */ + bzero(&ro, sizeof(ro)); + linkhdrsize = sizeof(linkhdr); + error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize); + if (error != 0 && error != EAFNOSUPPORT) { + ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n", + if_name(ifp), error); + return; + } + + ro.ro_prepend = linkhdr; + ro.ro_plen = linkhdrsize; + ro.ro_flags = 0; + m->m_flags |= M_BCAST; - (*ifp->if_output)(ifp, m, &sa, NULL); + m_clrprotoflags(m); /* Avoid confusing lower layers. */ + (*ifp->if_output)(ifp, m, &sa, &ro); ARPSTAT_INC(txrequests); } + /* - * Resolve an IP address into an ethernet address. - * On input: - * ifp is the interface we use - * rt0 is the route to the final destination (possibly useless) - * m is the mbuf. May be NULL if we don't have a packet. - * dst is the next hop, - * desten is where we want the address. + * Resolve an IP address into an ethernet address - heavy version. + * Used internally by arpresolve(). + * We have already checked than we can't use existing lle without + * modification so we have to acquire LLE_EXCLUSIVE lle lock. * - * On success, desten is filled in and the function returns 0; + * On success, desten and flags are filled in and the function returns 0; * If the packet must be held pending resolution, we return EWOULDBLOCK * On other errors, we return the corresponding error code. * Note that m_freem() handles NULL. */ -int -arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, - struct sockaddr *dst, u_char *desten, struct llentry **lle) +static int +arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m, + const struct sockaddr *dst, u_char *desten, uint32_t *pflags, + struct llentry **plle) { - struct llentry *la = 0; - u_int flags = 0; + struct llentry *la = NULL, *la_tmp; struct mbuf *curr = NULL; struct mbuf *next = NULL; int error, renew; + char *lladdr; + int ll_len; - *lle = NULL; - if (m != NULL) { - if (m->m_flags & M_BCAST) { - /* broadcast */ - (void)memcpy(desten, - ifp->if_broadcastaddr, ifp->if_addrlen); - return (0); - } - if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) { - /* multicast */ - ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); - return (0); - } + if (pflags != NULL) + *pflags = 0; + if (plle != NULL) + *plle = NULL; + + if ((flags & LLE_CREATE) == 0) { + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + IF_AFDATA_RUNLOCK(ifp); } -retry: - IF_AFDATA_RLOCK(ifp); - la = lla_lookup(LLTABLE(ifp), flags, dst); - IF_AFDATA_RUNLOCK(ifp); - if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0) - && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) { - flags |= (LLE_CREATE | LLE_EXCLUSIVE); + if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) { + la = lltable_alloc_entry(LLTABLE(ifp), 0, dst); + if (la == NULL) { + log(LOG_DEBUG, + "arpresolve: can't allocate llinfo for %s on %s\n", + inet_ntoa(SIN(dst)->sin_addr), if_name(ifp)); + m_freem(m); + return (EINVAL); + } + IF_AFDATA_WLOCK(ifp); - la = lla_lookup(LLTABLE(ifp), flags, dst); + LLE_WLOCK(la); + la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + /* Prefer ANY existing lle over newly-created one */ + if (la_tmp == NULL) + lltable_link_entry(LLTABLE(ifp), la); IF_AFDATA_WUNLOCK(ifp); + if (la_tmp != NULL) { + lltable_free_entry(LLTABLE(ifp), la); + la = la_tmp; + } } if (la == NULL) { - if (flags & LLE_CREATE) - log(LOG_DEBUG, - "arpresolve: can't allocate llinfo for %s on %s\n", - inet_ntoa(SIN(dst)->sin_addr), ifp->if_xname); m_freem(m); return (EINVAL); } if ((la->la_flags & LLE_VALID) && ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { - bcopy(&la->ll_addr, desten, ifp->if_addrlen); - /* - * If entry has an expiry time and it is approaching, - * see if we need to send an ARP request within this - * arpt_down interval. - */ - if (!(la->la_flags & LLE_STATIC) && - time_uptime + la->la_preempt > la->la_expire) { - arprequest(ifp, NULL, - &SIN(dst)->sin_addr, IF_LLADDR(ifp)); - - la->la_preempt--; + if (flags & LLE_ADDRONLY) { + lladdr = la->ll_addr; + ll_len = ifp->if_addrlen; + } else { + lladdr = la->r_linkdata; + ll_len = la->r_hdrlen; } + bcopy(lladdr, desten, ll_len); - *lle = la; - error = 0; - goto done; - } - - if (la->la_flags & LLE_STATIC) { /* should not happen! */ - log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n", - inet_ntoa(SIN(dst)->sin_addr)); - m_freem(m); - error = EINVAL; - goto done; + /* Check if we have feedback request from arptimer() */ + if (la->r_skip_req != 0) { + LLE_REQ_LOCK(la); + la->r_skip_req = 0; /* Notify that entry was used */ + LLE_REQ_UNLOCK(la); + } + if (pflags != NULL) + *pflags = la->la_flags & (LLE_VALID|LLE_IFADDR); + if (plle) { + LLE_ADDREF(la); + *plle = la; + } + LLE_WUNLOCK(la); + return (0); } renew = (la->la_asked == 0 || la->la_expire != time_uptime); - if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) { - flags |= LLE_EXCLUSIVE; - LLE_RUNLOCK(la); - goto retry; - } /* * There is an arptab entry, but no ethernet address * response yet. Add the mbuf to the list, dropping @@ -393,11 +520,6 @@ retry: } else la->la_hold = m; la->la_numheld++; - if (renew == 0 && (flags & LLE_EXCLUSIVE)) { - flags &= ~LLE_EXCLUSIVE; - LLE_DOWNGRADE(la); - } - } /* * Return EWOULDBLOCK if we have tried less than arp_maxtries. It @@ -408,32 +530,113 @@ retry: if (la->la_asked < V_arp_maxtries) error = EWOULDBLOCK; /* First request. */ else - error = rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) ? - EHOSTUNREACH : EHOSTDOWN; + error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN; if (renew) { int canceled; LLE_ADDREF(la); la->la_expire = time_uptime; - canceled = callout_reset(&la->la_timer, hz * V_arpt_down, + canceled = callout_reset(&la->lle_timer, hz * V_arpt_down, arptimer, la); if (canceled) LLE_REMREF(la); la->la_asked++; LLE_WUNLOCK(la); - arprequest(ifp, NULL, &SIN(dst)->sin_addr, - IF_LLADDR(ifp)); + arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL); return (error); } -done: - if (flags & LLE_EXCLUSIVE) - LLE_WUNLOCK(la); - else - LLE_RUNLOCK(la); + + LLE_WUNLOCK(la); + return (error); +} + +/* + * Resolve an IP address into an ethernet address. + */ +int +arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags, struct llentry **plle) +{ + int error; + + flags |= LLE_ADDRONLY; + error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags, plle); return (error); } + +/* + * Lookups link header based on an IP address. + * On input: + * ifp is the interface we use + * is_gw != 0 if @dst represents gateway to some destination + * m is the mbuf. May be NULL if we don't have a packet. + * dst is the next hop, + * desten is the storage to put LL header. + * flags returns subset of lle flags: LLE_VALID | LLE_IFADDR + * + * On success, full/partial link header and flags are filled in and + * the function returns 0. + * If the packet must be held pending resolution, we return EWOULDBLOCK + * On other errors, we return the corresponding error code. + * Note that m_freem() handles NULL. + */ +int +arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m, + const struct sockaddr *dst, u_char *desten, uint32_t *pflags, + struct llentry **plle) +{ + struct llentry *la = NULL; + + if (pflags != NULL) + *pflags = 0; + if (plle != NULL) + *plle = NULL; + + if (m != NULL) { + if (m->m_flags & M_BCAST) { + /* broadcast */ + (void)memcpy(desten, + ifp->if_broadcastaddr, ifp->if_addrlen); + return (0); + } + if (m->m_flags & M_MCAST) { + /* multicast */ + ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); + return (0); + } + } + + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, dst); + if (la != NULL && (la->r_flags & RLLE_VALID) != 0) { + /* Entry found, let's copy lle info */ + bcopy(la->r_linkdata, desten, la->r_hdrlen); + if (pflags != NULL) + *pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR); + /* Check if we have feedback request from arptimer() */ + if (la->r_skip_req != 0) { + LLE_REQ_LOCK(la); + la->r_skip_req = 0; /* Notify that entry was used */ + LLE_REQ_UNLOCK(la); + } + if (plle) { + LLE_ADDREF(la); + *plle = la; + LLE_WUNLOCK(la); + } + IF_AFDATA_RUNLOCK(ifp); + return (0); + } + if (plle && la) + LLE_WUNLOCK(la); + IF_AFDATA_RUNLOCK(ifp); + + return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst, + desten, pflags, plle)); +} + /* * Common length and type checks are done here, * then the protocol-specific routine is called. @@ -442,34 +645,76 @@ static void arpintr(struct mbuf *m) { struct arphdr *ar; + struct ifnet *ifp; + char *layer; + int hlen; + + ifp = m->m_pkthdr.rcvif; if (m->m_len < sizeof(struct arphdr) && ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) { - log(LOG_NOTICE, "arp: runt packet -- m_pullup failed\n"); + ARP_LOG(LOG_NOTICE, "packet with short header received on %s\n", + if_name(ifp)); return; } ar = mtod(m, struct arphdr *); - if (ntohs(ar->ar_hrd) != ARPHRD_ETHER && - ntohs(ar->ar_hrd) != ARPHRD_IEEE802 && - ntohs(ar->ar_hrd) != ARPHRD_ARCNET && - ntohs(ar->ar_hrd) != ARPHRD_IEEE1394 && - ntohs(ar->ar_hrd) != ARPHRD_INFINIBAND) { - log(LOG_NOTICE, "arp: unknown hardware address format (0x%2D)" - " (from %*D to %*D)\n", (unsigned char *)&ar->ar_hrd, "", - ETHER_ADDR_LEN, (u_char *)ar_sha(ar), ":", - ETHER_ADDR_LEN, (u_char *)ar_tha(ar), ":"); + /* Check if length is sufficient */ + if (m->m_len < arphdr_len(ar)) { + m = m_pullup(m, arphdr_len(ar)); + if (m == NULL) { + ARP_LOG(LOG_NOTICE, "short packet received on %s\n", + if_name(ifp)); + return; + } + ar = mtod(m, struct arphdr *); + } + + hlen = 0; + layer = ""; + switch (ntohs(ar->ar_hrd)) { + case ARPHRD_ETHER: + hlen = ETHER_ADDR_LEN; /* RFC 826 */ + layer = "ethernet"; + break; + case ARPHRD_IEEE802: + hlen = 6; /* RFC 1390, FDDI_ADDR_LEN */ + layer = "fddi"; + break; + case ARPHRD_ARCNET: + hlen = 1; /* RFC 1201, ARC_ADDR_LEN */ + layer = "arcnet"; + break; + case ARPHRD_INFINIBAND: + hlen = 20; /* RFC 4391, INFINIBAND_ALEN */ + layer = "infiniband"; + break; + case ARPHRD_IEEE1394: + hlen = 0; /* SHALL be 16 */ /* RFC 2734 */ + layer = "firewire"; + + /* + * Restrict too long hardware addresses. + * Currently we are capable of handling 20-byte + * addresses ( sizeof(lle->ll_addr) ) + */ + if (ar->ar_hln >= 20) + hlen = 16; + break; + default: + ARP_LOG(LOG_NOTICE, + "packet with unknown hardware format 0x%02d received on " + "%s\n", ntohs(ar->ar_hrd), if_name(ifp)); m_freem(m); return; } - if (m->m_len < arphdr_len(ar)) { - if ((m = m_pullup(m, arphdr_len(ar))) == NULL) { - log(LOG_NOTICE, "arp: runt packet\n"); - m_freem(m); - return; - } - ar = mtod(m, struct arphdr *); + if (hlen != 0 && hlen != ar->ar_hln) { + ARP_LOG(LOG_NOTICE, + "packet with invalid %s address length %d received on %s\n", + layer, ar->ar_hln, if_name(ifp)); + m_freem(m); + return; } ARPSTAT_INC(received); @@ -518,20 +763,27 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW, static void in_arpinput(struct mbuf *m) { + struct rm_priotracker in_ifa_tracker; struct arphdr *ah; struct ifnet *ifp = m->m_pkthdr.rcvif; - struct llentry *la = NULL; - struct rtentry *rt; + struct llentry *la = NULL, *la_tmp; struct ifaddr *ifa; struct in_ifaddr *ia; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; u_int8_t *enaddr = NULL; - int op, flags; - int req_len; + int op; int bridged = 0, is_bridge = 0; - int carp_match = 0; + int carped; struct sockaddr_in sin; + struct sockaddr *dst; + struct nhop4_basic nh4; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + struct route ro; + size_t linkhdrsize; + int lladdr_off; + int error; + sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr.s_addr = 0; @@ -541,25 +793,24 @@ in_arpinput(struct mbuf *m) if (ifp->if_type == IFT_BRIDGE) is_bridge = 1; - req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); - if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) { - log(LOG_NOTICE, "in_arp: runt packet -- m_pullup failed\n"); - return; - } - + /* + * We already have checked that mbuf contains enough contiguous data + * to hold entire arp message according to the arp header. + */ ah = mtod(m, struct arphdr *); + /* * ARP is only for IPv4 so we can reject packets with * a protocol length not equal to an IPv4 address. */ if (ah->ar_pln != sizeof(struct in_addr)) { - log(LOG_NOTICE, "in_arp: requested protocol length != %zu\n", + ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n", sizeof(struct in_addr)); goto drop; } if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) { - log(LOG_NOTICE, "arp: %*D is multicast\n", + ARP_LOG(LOG_NOTICE, "%*D is multicast\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":"); goto drop; } @@ -575,26 +826,16 @@ in_arpinput(struct mbuf *m) * For a bridge, we want to check the address irrespective * of the receive interface. (This will change slightly * when we have clusters of interfaces). - * If the interface does not match, but the recieving interface - * is part of carp, we call carp_iamatch to see if this is a - * request for the virtual host ip. - * XXX: This is really ugly! */ - IN_IFADDR_RLOCK(); + IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) { if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) || ia->ia_ifp == ifp) && - itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { + itaddr.s_addr == ia->ia_addr.sin_addr.s_addr && + (ia->ia_ifa.ifa_carp == NULL || + (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) { ifa_ref(&ia->ia_ifa); - IN_IFADDR_RUNLOCK(); - goto match; - } - if (ifp->if_carp != NULL && - (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) && - itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { - carp_match = 1; - ifa_ref(&ia->ia_ifa); - IN_IFADDR_RUNLOCK(); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto match; } } @@ -603,7 +844,7 @@ in_arpinput(struct mbuf *m) ia->ia_ifp == ifp) && isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { ifa_ref(&ia->ia_ifa); - IN_IFADDR_RUNLOCK(); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto match; } @@ -622,13 +863,13 @@ in_arpinput(struct mbuf *m) if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) { ifa_ref(&ia->ia_ifa); ifp = ia->ia_ifp; - IN_IFADDR_RUNLOCK(); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto match; } } } #undef BDG_MEMBER_MATCHES_ARP - IN_IFADDR_RUNLOCK(); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * No match, use the first inet address on the receive interface @@ -636,7 +877,9 @@ in_arpinput(struct mbuf *m) */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) - if (ifa->ifa_addr->sa_family == AF_INET) { + if (ifa->ifa_addr->sa_family == AF_INET && + (ifa->ifa_carp == NULL || + (*carp_iamatch_p)(ifa, &enaddr))) { ia = ifatoia(ifa); ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); @@ -647,35 +890,44 @@ in_arpinput(struct mbuf *m) /* * If bridging, fall back to using any inet address. */ - IN_IFADDR_RLOCK(); + IN_IFADDR_RLOCK(&in_ifa_tracker); if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL) { - IN_IFADDR_RUNLOCK(); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto drop; } ifa_ref(&ia->ia_ifa); - IN_IFADDR_RUNLOCK(); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); match: if (!enaddr) enaddr = (u_int8_t *)IF_LLADDR(ifp); + carped = (ia->ia_ifa.ifa_carp != NULL); myaddr = ia->ia_addr.sin_addr; ifa_free(&ia->ia_ifa); if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) goto drop; /* it's from me, ignore it. */ if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { - log(LOG_NOTICE, - "arp: link address is broadcast for IP address %s!\n", - inet_ntoa(isaddr)); + ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address " + "%s!\n", inet_ntoa(isaddr)); + goto drop; + } + + if (ifp->if_addrlen != ah->ar_hln) { + ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, " + "i/f %d (ignored)\n", ifp->if_addrlen, + (u_char *) ar_sha(ah), ":", ah->ar_hln, + ifp->if_addrlen); goto drop; } + /* * Warn if another host is using the same IP address, but only if the * IP address isn't 0.0.0.0, which is used for DHCP only, in which * case we suppress the warning to avoid false positive complaints of * potential misconfiguration. */ - if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) { - log(LOG_ERR, - "arp: %*D is using my IP address %s on %s!\n", + if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr && + myaddr.s_addr != 0) { + ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr), ifp->if_xname); itaddr = myaddr; @@ -689,95 +941,73 @@ match: sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = isaddr; - flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0; - flags |= LLE_EXCLUSIVE; - IF_AFDATA_LOCK(ifp); - la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin); - IF_AFDATA_UNLOCK(ifp); - if (la != NULL) { - /* the following is not an error when doing bridging */ - if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) { - if (log_arp_wrong_iface) - log(LOG_WARNING, "arp: %s is on %s " - "but got reply from %*D on %s\n", - inet_ntoa(isaddr), - la->lle_tbl->llt_ifp->if_xname, - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - ifp->if_xname); - LLE_WUNLOCK(la); + dst = (struct sockaddr *)&sin; + IF_AFDATA_RLOCK(ifp); + la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + IF_AFDATA_RUNLOCK(ifp); + if (la != NULL) + arp_check_update_lle(ah, isaddr, ifp, bridged, la); + else if (itaddr.s_addr == myaddr.s_addr) { + /* + * Request/reply to our address, but no lle exists yet. + * Calculate full link prepend to use in lle. + */ + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr, + &linkhdrsize, &lladdr_off) != 0) goto reply; - } - if ((la->la_flags & LLE_VALID) && - bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) { - if (la->la_flags & LLE_STATIC) { - LLE_WUNLOCK(la); - if (log_arp_permanent_modify) - log(LOG_ERR, - "arp: %*D attempts to modify " - "permanent entry for %s on %s\n", - ifp->if_addrlen, - (u_char *)ar_sha(ah), ":", - inet_ntoa(isaddr), ifp->if_xname); - goto reply; - } - if (log_arp_movements) { - log(LOG_INFO, "arp: %s moved from %*D " - "to %*D on %s\n", - inet_ntoa(isaddr), - ifp->if_addrlen, - (u_char *)&la->ll_addr, ":", - ifp->if_addrlen, (u_char *)ar_sha(ah), ":", - ifp->if_xname); - } - } - if (ifp->if_addrlen != ah->ar_hln) { - LLE_WUNLOCK(la); - log(LOG_WARNING, "arp from %*D: addr len: new %d, " - "i/f %d (ignored)\n", ifp->if_addrlen, - (u_char *) ar_sha(ah), ":", ah->ar_hln, - ifp->if_addrlen); - goto drop; - } - (void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen); - la->la_flags |= LLE_VALID; + /* Allocate new entry */ + la = lltable_alloc_entry(LLTABLE(ifp), 0, dst); + if (la == NULL) { - EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED); + /* + * lle creation may fail if source address belongs + * to non-directly connected subnet. However, we + * will try to answer the request instead of dropping + * frame. + */ + goto reply; + } + lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize, + lladdr_off); - if (!(la->la_flags & LLE_STATIC)) { - int canceled; + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(la); + la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); - LLE_ADDREF(la); - la->la_expire = time_uptime + V_arpt_keep; - canceled = callout_reset(&la->la_timer, - hz * V_arpt_keep, arptimer, la); - if (canceled) - LLE_REMREF(la); - } - la->la_asked = 0; - la->la_preempt = V_arp_maxtries; /* - * The packets are all freed within the call to the output - * routine. + * Check if lle still does not exists. + * If it does, that means that we either + * 1) have configured it explicitly, via + * 1a) 'arp -s' static entry or + * 1b) interface address static record + * or + * 2) it was the result of sending first packet to-host + * or + * 3) it was another arp reply packet we handled in + * different thread. * - * NB: The lock MUST be released before the call to the - * output routine. + * In all cases except 3) we definitely need to prefer + * existing lle. For the sake of simplicity, prefer any + * existing lle over newly-create one. */ - if (la->la_hold != NULL) { - struct mbuf *m_hold, *m_hold_next; + if (la_tmp == NULL) + lltable_link_entry(LLTABLE(ifp), la); + IF_AFDATA_WUNLOCK(ifp); - m_hold = la->la_hold; - la->la_hold = NULL; - la->la_numheld = 0; - memcpy(&sa, L3_ADDR(la), sizeof(sa)); - LLE_WUNLOCK(la); - for (; m_hold != NULL; m_hold = m_hold_next) { - m_hold_next = m_hold->m_nextpkt; - m_hold->m_nextpkt = NULL; - (*ifp->if_output)(ifp, m_hold, &sa, NULL); - } - } else + if (la_tmp == NULL) { + arp_mark_lle_reachable(la); LLE_WUNLOCK(la); + } else { + /* Free newly-create entry and handle packet */ + lltable_free_entry(LLTABLE(ifp), la); + la = la_tmp; + la_tmp = NULL; + arp_check_update_lle(ah, isaddr, ifp, bridged, la); + /* arp_check_update_lle() returns @la unlocked */ + } + la = NULL; } reply: if (op != ARPOP_REQUEST) @@ -798,7 +1028,7 @@ reply: if ((lle != NULL) && (lle->la_flags & LLE_PUB)) { (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); - (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln); + (void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln); LLE_RUNLOCK(lle); } else { @@ -808,10 +1038,8 @@ reply: if (!V_arp_proxyall) goto drop; - sin.sin_addr = itaddr; /* XXX MRT use table 0 for arp reply */ - rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); - if (!rt) + if (fib4_lookup_nh_basic(0, itaddr, 0, 0, &nh4) != 0) goto drop; /* @@ -819,11 +1047,8 @@ reply: * as this one came out of, or we'll get into a fight * over who claims what Ether address. */ - if (!rt->rt_ifp || rt->rt_ifp == ifp) { - RTFREE_LOCKED(rt); + if (nh4.nh_ifp == ifp) goto drop; - } - RTFREE_LOCKED(rt); (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); @@ -834,21 +1059,16 @@ reply: * avoids ARP chaos if an interface is connected to the * wrong network. */ - sin.sin_addr = isaddr; /* XXX MRT use table 0 for arp checks */ - rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); - if (!rt) + if (fib4_lookup_nh_basic(0, isaddr, 0, 0, &nh4) != 0) goto drop; - if (rt->rt_ifp != ifp) { - log(LOG_INFO, "arp_proxy: ignoring request" - " from %s via %s, expecting %s\n", - inet_ntoa(isaddr), ifp->if_xname, - rt->rt_ifp->if_xname); - RTFREE_LOCKED(rt); + if (nh4.nh_ifp != ifp) { + ARP_LOG(LOG_INFO, "proxy: ignoring request" + " from %s via %s\n", + inet_ntoa(isaddr), ifp->if_xname); goto drop; } - RTFREE_LOCKED(rt); #ifdef DEBUG_PROXY printf("arp: proxying for %s\n", inet_ntoa(itaddr)); @@ -878,7 +1098,29 @@ reply: m->m_pkthdr.rcvif = NULL; sa.sa_family = AF_ARP; sa.sa_len = 2; - (*ifp->if_output)(ifp, m, &sa, NULL); + + /* Calculate link header for sending frame */ + bzero(&ro, sizeof(ro)); + linkhdrsize = sizeof(linkhdr); + error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize); + + /* + * arp_fillheader() may fail due to lack of support inside encap request + * routing. This is not necessary an error, AF_ARP can/should be handled + * by if_output(). + */ + if (error != 0 && error != EAFNOSUPPORT) { + ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n", + if_name(ifp), error); + return; + } + + ro.ro_prepend = linkhdr; + ro.ro_plen = linkhdrsize; + ro.ro_flags = 0; + + m_clrprotoflags(m); /* Avoid confusing lower layers. */ + (*ifp->if_output)(ifp, m, &sa, &ro); ARPSTAT_INC(txreplies); return; @@ -887,45 +1129,249 @@ drop: } #endif +/* + * Checks received arp data against existing @la. + * Updates lle state/performs notification if necessary. + */ +static void +arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp, + int bridged, struct llentry *la) +{ + struct sockaddr sa; + struct mbuf *m_hold, *m_hold_next; + uint8_t linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; + + LLE_WLOCK_ASSERT(la); + + /* the following is not an error when doing bridging */ + if (!bridged && la->lle_tbl->llt_ifp != ifp) { + if (log_arp_wrong_iface) + ARP_LOG(LOG_WARNING, "%s is on %s " + "but got reply from %*D on %s\n", + inet_ntoa(isaddr), + la->lle_tbl->llt_ifp->if_xname, + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + ifp->if_xname); + LLE_WUNLOCK(la); + return; + } + if ((la->la_flags & LLE_VALID) && + bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) { + if (la->la_flags & LLE_STATIC) { + LLE_WUNLOCK(la); + if (log_arp_permanent_modify) + ARP_LOG(LOG_ERR, + "%*D attempts to modify " + "permanent entry for %s on %s\n", + ifp->if_addrlen, + (u_char *)ar_sha(ah), ":", + inet_ntoa(isaddr), ifp->if_xname); + return; + } + if (log_arp_movements) { + ARP_LOG(LOG_INFO, "%s moved from %*D " + "to %*D on %s\n", + inet_ntoa(isaddr), + ifp->if_addrlen, + (u_char *)&la->ll_addr, ":", + ifp->if_addrlen, (u_char *)ar_sha(ah), ":", + ifp->if_xname); + } + } + + /* Calculate full link prepend to use in lle */ + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr, + &linkhdrsize, &lladdr_off) != 0) + return; + + /* Check if something has changed */ + if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 || + (la->la_flags & LLE_VALID) == 0) { + /* Try to perform LLE update */ + if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize, + lladdr_off) == 0) + return; + + /* Clear fast path feedback request if set */ + la->r_skip_req = 0; + } + + arp_mark_lle_reachable(la); + + /* + * The packets are all freed within the call to the output + * routine. + * + * NB: The lock MUST be released before the call to the + * output routine. + */ + if (la->la_hold != NULL) { + m_hold = la->la_hold; + la->la_hold = NULL; + la->la_numheld = 0; + lltable_fill_sa_entry(la, &sa); + LLE_WUNLOCK(la); + for (; m_hold != NULL; m_hold = m_hold_next) { + m_hold_next = m_hold->m_nextpkt; + m_hold->m_nextpkt = NULL; + /* Avoid confusing lower layers. */ + m_clrprotoflags(m_hold); + (*ifp->if_output)(ifp, m_hold, &sa, NULL); + } + } else + LLE_WUNLOCK(la); +} + +static void +arp_mark_lle_reachable(struct llentry *la) +{ + int canceled, wtime; + + LLE_WLOCK_ASSERT(la); + + la->ln_state = ARP_LLINFO_REACHABLE; + EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED); + + if (!(la->la_flags & LLE_STATIC)) { + LLE_ADDREF(la); + la->la_expire = time_uptime + V_arpt_keep; + wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit; + if (wtime < 0) + wtime = V_arpt_keep; + canceled = callout_reset(&la->lle_timer, + hz * wtime, arptimer, la); + if (canceled) + LLE_REMREF(la); + } + la->la_asked = 0; + la->la_preempt = V_arp_maxtries; +} + +/* + * Add pernament link-layer record for given interface address. + */ +static __noinline void +arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst) +{ + struct llentry *lle, *lle_tmp; + + /* + * Interface address LLE record is considered static + * because kernel code relies on LLE_STATIC flag to check + * if these entries can be rewriten by arp updates. + */ + lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst); + if (lle == NULL) { + log(LOG_INFO, "arp_ifinit: cannot create arp " + "entry for interface address\n"); + return; + } + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(lle); + /* Unlink any entry if exists */ + lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst); + if (lle_tmp != NULL) + lltable_unlink_entry(LLTABLE(ifp), lle_tmp); + + lltable_link_entry(LLTABLE(ifp), lle); + IF_AFDATA_WUNLOCK(ifp); + + if (lle_tmp != NULL) + EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED); + + EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED); + LLE_WUNLOCK(lle); + if (lle_tmp != NULL) + lltable_free_entry(LLTABLE(ifp), lle_tmp); +} + void arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) { - struct llentry *lle; + const struct sockaddr_in *dst_in; + const struct sockaddr *dst; - if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) { - arprequest(ifp, &IA_SIN(ifa)->sin_addr, - &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp)); - /* - * interface address is considered static entry - * because the output of the arp utility shows - * that L2 entry as permanent - */ - IF_AFDATA_LOCK(ifp); - lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC), - (struct sockaddr *)IA_SIN(ifa)); - IF_AFDATA_UNLOCK(ifp); - if (lle == NULL) - log(LOG_INFO, "arp_ifinit: cannot create arp " - "entry for interface address\n"); - else - LLE_RUNLOCK(lle); - } - ifa->ifa_rtrequest = NULL; + if (ifa->ifa_carp != NULL) + return; + + dst = ifa->ifa_addr; + dst_in = (const struct sockaddr_in *)dst; + + if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY) + return; + arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp)); + + arp_add_ifa_lle(ifp, dst); } void -arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr) +arp_announce_ifaddr(struct ifnet *ifp, struct in_addr addr, u_char *enaddr) +{ + + if (ntohl(addr.s_addr) != INADDR_ANY) + arprequest(ifp, &addr, &addr, enaddr); +} + +/* + * Sends gratuitous ARPs for each ifaddr to notify other + * nodes about the address change. + */ +static __noinline void +arp_handle_ifllchange(struct ifnet *ifp) +{ + struct ifaddr *ifa; + + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family == AF_INET) + arp_ifinit(ifp, ifa); + } +} + +/* + * A handler for interface link layer address change event. + */ +static void +arp_iflladdr(void *arg __unused, struct ifnet *ifp) +{ + + lltable_update_ifaddr(LLTABLE(ifp)); + + if ((ifp->if_flags & IFF_UP) != 0) + arp_handle_ifllchange(ifp); +} + +static void +vnet_arp_init(void) { - if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) - arprequest(ifp, &IA_SIN(ifa)->sin_addr, - &IA_SIN(ifa)->sin_addr, enaddr); - ifa->ifa_rtrequest = NULL; + + if (IS_DEFAULT_VNET(curvnet)) { + netisr_register(&arp_nh); + iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, + arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + } +#ifdef VIMAGE + else + netisr_register_vnet(&arp_nh); +#endif } +VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, + vnet_arp_init, 0); +#ifdef VIMAGE +/* + * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH + * lookups after destroying the hash. Ideally this would go on SI_ORDER_3.5. + */ static void -arp_init(void) +vnet_arp_destroy(__unused void *arg) { - netisr_register(&arp_nh); + netisr_unregister_vnet(&arp_nh); } -SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); +VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, + vnet_arp_destroy, NULL); +#endif |