[BACK]Return to if_gre.c CVS log [TXT][DIR] Up to [local] / src / sys / net

File: [local] / src / sys / net / if_gre.c (download)

Revision 1.178, Sat Dec 23 10:52:54 2023 UTC (5 months, 2 weeks ago) by bluhm
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, HEAD
Changes since 1.177: +6 -1 lines

Backout always allocate per-CPU statistics counters for network
interface descriptor.  It panics during attach of em(4) device at
boot.

/*	$OpenBSD: if_gre.c,v 1.178 2023/12/23 10:52:54 bluhm Exp $ */
/*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */

/*
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Heiko W.Rupp <hwr@pilhuhn.de>
 *
 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
 * See gre(4) for more details.
 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
 */

#include "bpfilter.h"
#include "pf.h"

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/timeout.h>
#include <sys/queue.h>
#include <sys/tree.h>
#include <sys/pool.h>
#include <sys/rwlock.h>

#include <crypto/siphash.h>

#include <net/if.h>
#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_media.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_ecn.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
#endif

#ifdef PIPEX
#include <net/pipex.h>
#endif

#ifdef MPLS
#include <netmpls/mpls.h>
#endif /* MPLS */

#if NBPFILTER > 0
#include <net/bpf.h>
#endif

#if NPF > 0
#include <net/pfvar.h>
#endif

#include <net/if_gre.h>

#include <netinet/ip_gre.h>
#include <sys/sysctl.h>

/* for nvgre bridge shizz */
#include <net/if_bridge.h>
#include <net/if_etherbridge.h>

/*
 * packet formats
 */
struct gre_header {
	uint16_t		gre_flags;
#define GRE_CP				0x8000  /* Checksum Present */
#define GRE_KP				0x2000  /* Key Present */
#define GRE_SP				0x1000  /* Sequence Present */

#define GRE_VERS_MASK			0x0007
#define GRE_VERS_0			0x0000
#define GRE_VERS_1			0x0001

	uint16_t		gre_proto;
} __packed __aligned(4);

struct gre_h_cksum {
	uint16_t		gre_cksum;
	uint16_t		gre_reserved1;
} __packed __aligned(4);

struct gre_h_key {
	uint32_t		gre_key;
} __packed __aligned(4);

#define GRE_EOIP		0x6400

struct gre_h_key_eoip {
	uint16_t		eoip_len;	/* network order */
	uint16_t		eoip_tunnel_id;	/* little endian */
} __packed __aligned(4);

#define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
#define NVGRE_VSID_RES_MAX	0x000fff
#define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */

struct gre_h_seq {
	uint32_t		gre_seq;
} __packed __aligned(4);

struct gre_h_wccp {
	uint8_t			wccp_flags;
	uint8_t			service_id;
	uint8_t			alt_bucket;
	uint8_t			pri_bucket;
} __packed __aligned(4);

#define GRE_WCCP 0x883e

#define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))

/*
 * GRE tunnel metadata
 */

#define GRE_KA_NONE		0
#define GRE_KA_DOWN		1
#define GRE_KA_HOLD		2
#define GRE_KA_UP		3

union gre_addr {
	struct in_addr		in4;
	struct in6_addr		in6;
};

static inline int
		gre_ip_cmp(int, const union gre_addr *,
		    const union gre_addr *);

#define GRE_KEY_MIN		0x00000000U
#define GRE_KEY_MAX		0xffffffffU
#define GRE_KEY_SHIFT		0

#define GRE_KEY_ENTROPY_MIN	0x00000000U
#define GRE_KEY_ENTROPY_MAX	0x00ffffffU
#define GRE_KEY_ENTROPY_SHIFT	8

struct gre_tunnel {
	uint32_t		t_key_mask;
#define GRE_KEY_NONE			htonl(0x00000000U)
#define GRE_KEY_ENTROPY			htonl(0xffffff00U)
#define GRE_KEY_MASK			htonl(0xffffffffU)
	uint32_t		t_key;

	u_int			t_rtableid;
	union gre_addr		t_src;
#define t_src4	t_src.in4
#define t_src6	t_src.in6
	union gre_addr		t_dst;
#define t_dst4	t_dst.in4
#define t_dst6	t_dst.in6
	int			t_ttl;
	int			t_txhprio;
	int			t_rxhprio;
	int			t_ecn;
	uint16_t		t_df;
	sa_family_t		t_af;
};

static int
		gre_cmp_src(const struct gre_tunnel *,
		    const struct gre_tunnel *);
static int
		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);

static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
static int	gre_del_tunnel(struct gre_tunnel *);

static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
static int	gre_del_vnetid(struct gre_tunnel *);

static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);

static struct mbuf *
		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
		    struct mbuf *, uint16_t, uint8_t, uint8_t);
#define gre_encap(_t, _m, _p, _ttl, _tos) \
		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))

static struct mbuf *
		gre_encap_dst_ip(const struct gre_tunnel *,
		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
#define gre_encap_ip(_t, _m, _ttl, _tos) \
		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))

static int
		gre_ip_output(const struct gre_tunnel *, struct mbuf *);

static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
		    u_long, void *);

static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
static uint8_t	gre_l3_tos(const struct gre_tunnel *,
		    const struct mbuf *, uint8_t);

/*
 * layer 3 GRE tunnels
 */

struct gre_softc {
	struct gre_tunnel	sc_tunnel; /* must be first */
	TAILQ_ENTRY(gre_softc)	sc_entry;

	struct ifnet		sc_if;

	struct timeout		sc_ka_send;
	struct timeout		sc_ka_hold;

	unsigned int		sc_ka_state;
	unsigned int		sc_ka_timeo;
	unsigned int		sc_ka_count;

	unsigned int		sc_ka_holdmax;
	unsigned int		sc_ka_holdcnt;

	SIPHASH_KEY		sc_ka_key;
	uint32_t		sc_ka_bias;
	int			sc_ka_recvtm;
};

TAILQ_HEAD(gre_list, gre_softc);

struct gre_keepalive {
	uint32_t		gk_uptime;
	uint32_t		gk_random;
	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
} __packed __aligned(4);

static int	gre_clone_create(struct if_clone *, int);
static int	gre_clone_destroy(struct ifnet *);

struct if_clone gre_cloner =
    IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);

/* protected by NET_LOCK */
struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);

static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
		    struct rtentry *);
static void	gre_start(struct ifnet *);
static int	gre_ioctl(struct ifnet *, u_long, caddr_t);

static int	gre_up(struct gre_softc *);
static int	gre_down(struct gre_softc *);
static void	gre_link_state(struct ifnet *, unsigned int);

static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
		    struct gre_tunnel *);

static struct mbuf *
		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
		    uint8_t *, uint8_t);
#ifdef INET6
static struct mbuf *
		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
		    uint8_t *, uint8_t);
#endif
#ifdef MPLS
static struct mbuf *
		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
		    uint8_t *, uint8_t);
#endif
static void	gre_keepalive_send(void *);
static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
static void	gre_keepalive_hold(void *);

static struct mbuf *
		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
		    struct mbuf *m, sa_family_t);

#define gre_l3_encap(_t, _m, _af) \
		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))

struct mgre_softc {
	struct gre_tunnel	sc_tunnel; /* must be first */
	RBT_ENTRY(mgre_softc)	sc_entry;

	struct ifnet		sc_if;
};

RBT_HEAD(mgre_tree, mgre_softc);

static inline int
		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);

RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);

static int	mgre_clone_create(struct if_clone *, int);
static int	mgre_clone_destroy(struct ifnet *);

struct if_clone mgre_cloner =
    IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);

static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
		    struct rtentry *);
static void	mgre_start(struct ifnet *);
static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);

static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
static int	mgre_up(struct mgre_softc *);
static int	mgre_down(struct mgre_softc *);

/* protected by NET_LOCK */
struct mgre_tree mgre_tree = RBT_INITIALIZER();

/*
 * Ethernet GRE tunnels
 */

static struct mbuf *
		gre_ether_align(struct mbuf *, int);

struct egre_softc {
	struct gre_tunnel	sc_tunnel; /* must be first */
	RBT_ENTRY(egre_softc)	sc_entry;

	struct arpcom		sc_ac;
	struct ifmedia		sc_media;
};

RBT_HEAD(egre_tree, egre_softc);

static inline int
		egre_cmp(const struct egre_softc *, const struct egre_softc *);

RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);

static int	egre_clone_create(struct if_clone *, int);
static int	egre_clone_destroy(struct ifnet *);

static void	egre_start(struct ifnet *);
static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
static int	egre_media_change(struct ifnet *);
static void	egre_media_status(struct ifnet *, struct ifmediareq *);

static int	egre_up(struct egre_softc *);
static int	egre_down(struct egre_softc *);

static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
		    uint8_t);
struct if_clone egre_cloner =
    IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);

/* protected by NET_LOCK */
struct egre_tree egre_tree = RBT_INITIALIZER();

/*
 * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
 */

struct nvgre_softc {
	struct gre_tunnel	 sc_tunnel; /* must be first */
	unsigned int		 sc_ifp0;
	RBT_ENTRY(nvgre_softc)	 sc_uentry;
	RBT_ENTRY(nvgre_softc)	 sc_mentry;

	struct arpcom		 sc_ac;
	struct ifmedia		 sc_media;

	struct mbuf_queue	 sc_send_list;
	struct task		 sc_send_task;

	void			*sc_inm;
	struct task		 sc_ltask;
	struct task		 sc_dtask;

	struct etherbridge	 sc_eb;
};

RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
RBT_HEAD(nvgre_mcast_tree, nvgre_softc);

static inline int
		nvgre_cmp_ucast(const struct nvgre_softc *,
		    const struct nvgre_softc *);
static int
		nvgre_cmp_mcast(const struct gre_tunnel *,
		    const union gre_addr *, unsigned int,
		    const struct gre_tunnel *, const union gre_addr *,
		    unsigned int);
static inline int
		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
		    const struct nvgre_softc *);

RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);

static int	nvgre_clone_create(struct if_clone *, int);
static int	nvgre_clone_destroy(struct ifnet *);

static void	nvgre_start(struct ifnet *);
static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);

static int	nvgre_up(struct nvgre_softc *);
static int	nvgre_down(struct nvgre_softc *);
static int	nvgre_set_parent(struct nvgre_softc *, const char *);
static void	nvgre_link_change(void *);
static void	nvgre_detach(void *);

static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
		    uint8_t);
static void	nvgre_send(void *);

static int	nvgre_add_addr(struct nvgre_softc *, const struct ifbareq *);
static int	nvgre_del_addr(struct nvgre_softc *, const struct ifbareq *);

static int	 nvgre_eb_port_eq(void *, void *, void *);
static void	*nvgre_eb_port_take(void *, void *);
static void	 nvgre_eb_port_rele(void *, void *);
static size_t	 nvgre_eb_port_ifname(void *, char *, size_t, void *);
static void	 nvgre_eb_port_sa(void *, struct sockaddr_storage *, void *);

static const struct etherbridge_ops nvgre_etherbridge_ops = {
	nvgre_eb_port_eq,
	nvgre_eb_port_take,
	nvgre_eb_port_rele,
	nvgre_eb_port_ifname,
	nvgre_eb_port_sa,
};

struct if_clone nvgre_cloner =
    IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);

struct pool nvgre_endpoint_pool;

/* protected by NET_LOCK */
struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();

/*
 * MikroTik Ethernet over IP protocol (eoip)
 */

struct eoip_softc {
	struct gre_tunnel	sc_tunnel; /* must be first */
	uint16_t		sc_tunnel_id;
	RBT_ENTRY(eoip_softc)	sc_entry;

	struct arpcom		sc_ac;
	struct ifmedia		sc_media;

	struct timeout		sc_ka_send;
	struct timeout		sc_ka_hold;

	unsigned int		sc_ka_state;
	unsigned int		sc_ka_timeo;
	unsigned int		sc_ka_count;

	unsigned int		sc_ka_holdmax;
	unsigned int		sc_ka_holdcnt;
};

RBT_HEAD(eoip_tree, eoip_softc);

static inline int
		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);

RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);

static int	eoip_clone_create(struct if_clone *, int);
static int	eoip_clone_destroy(struct ifnet *);

static void	eoip_start(struct ifnet *);
static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);

static void	eoip_keepalive_send(void *);
static void	eoip_keepalive_recv(struct eoip_softc *);
static void	eoip_keepalive_hold(void *);

static int	eoip_up(struct eoip_softc *);
static int	eoip_down(struct eoip_softc *);

static struct mbuf *
		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);

static struct mbuf *
		eoip_input(struct gre_tunnel *, struct mbuf *,
		    const struct gre_header *, uint8_t, int);
struct if_clone eoip_cloner =
    IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);

/* protected by NET_LOCK */
struct eoip_tree eoip_tree = RBT_INITIALIZER();

/*
 * It is not easy to calculate the right value for a GRE MTU.
 * We leave this task to the admin and use the same default that
 * other vendors use.
 */
#define GREMTU 1476

/*
 * We can control the acceptance of GRE and MobileIP packets by
 * altering the sysctl net.inet.gre.allow values
 * respectively. Zero means drop them, all else is acceptance.  We can also
 * control acceptance of WCCPv1-style GRE packets through the
 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
 * allowed as well.
 *
 */
int gre_allow = 0;
int gre_wccp = 0;

void
greattach(int n)
{
	if_clone_attach(&gre_cloner);
	if_clone_attach(&mgre_cloner);
	if_clone_attach(&egre_cloner);
	if_clone_attach(&nvgre_cloner);
	if_clone_attach(&eoip_cloner);
}

static int
gre_clone_create(struct if_clone *ifc, int unit)
{
	struct gre_softc *sc;
	struct ifnet *ifp;

	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
	    ifc->ifc_name, unit);

	ifp = &sc->sc_if;
	ifp->if_softc = sc;
	ifp->if_type = IFT_TUNNEL;
	ifp->if_hdrlen = GRE_HDRLEN;
	ifp->if_mtu = GREMTU;
	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
	ifp->if_xflags = IFXF_CLONED;
	ifp->if_bpf_mtap = p2p_bpf_mtap;
	ifp->if_input = p2p_input;
	ifp->if_output = gre_output;
	ifp->if_start = gre_start;
	ifp->if_ioctl = gre_ioctl;
	ifp->if_rtrequest = p2p_rtrequest;

	sc->sc_tunnel.t_ttl = ip_defttl;
	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
	sc->sc_tunnel.t_df = htons(0);
	sc->sc_tunnel.t_ecn = ECN_ALLOWED;

	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
	sc->sc_ka_state = GRE_KA_NONE;

	if_counters_alloc(ifp);
	if_attach(ifp);
	if_alloc_sadl(ifp);

#if NBPFILTER > 0
	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
#endif

	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);

	NET_LOCK();
	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
	NET_UNLOCK();

	return (0);
}

static int
gre_clone_destroy(struct ifnet *ifp)
{
	struct gre_softc *sc = ifp->if_softc;

	NET_LOCK();
	if (ISSET(ifp->if_flags, IFF_RUNNING))
		gre_down(sc);

	TAILQ_REMOVE(&gre_list, sc, sc_entry);
	NET_UNLOCK();

	if_detach(ifp);

	free(sc, M_DEVBUF, sizeof(*sc));

	return (0);
}

static int
mgre_clone_create(struct if_clone *ifc, int unit)
{
	struct mgre_softc *sc;
	struct ifnet *ifp;

	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
	ifp = &sc->sc_if;

	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
	    "%s%d", ifc->ifc_name, unit);

	ifp->if_softc = sc;
	ifp->if_type = IFT_L3IPVLAN;
	ifp->if_hdrlen = GRE_HDRLEN;
	ifp->if_mtu = GREMTU;
	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
	ifp->if_xflags = IFXF_CLONED;
	ifp->if_bpf_mtap = p2p_bpf_mtap;
	ifp->if_input = p2p_input;
	ifp->if_rtrequest = mgre_rtrequest;
	ifp->if_output = mgre_output;
	ifp->if_start = mgre_start;
	ifp->if_ioctl = mgre_ioctl;

	sc->sc_tunnel.t_ttl = ip_defttl;
	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
	sc->sc_tunnel.t_df = htons(0);
	sc->sc_tunnel.t_ecn = ECN_ALLOWED;

	if_counters_alloc(ifp);
	if_attach(ifp);
	if_alloc_sadl(ifp);

#if NBPFILTER > 0
	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
#endif

	return (0);
}

static int
mgre_clone_destroy(struct ifnet *ifp)
{
	struct mgre_softc *sc = ifp->if_softc;

	NET_LOCK();
	if (ISSET(ifp->if_flags, IFF_RUNNING))
		mgre_down(sc);
	NET_UNLOCK();

	if_detach(ifp);

	free(sc, M_DEVBUF, sizeof(*sc));

	return (0);
}

static int
egre_clone_create(struct if_clone *ifc, int unit)
{
	struct egre_softc *sc;
	struct ifnet *ifp;

	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
	ifp = &sc->sc_ac.ac_if;

	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
	    ifc->ifc_name, unit);

	ifp->if_softc = sc;
	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
	ifp->if_ioctl = egre_ioctl;
	ifp->if_start = egre_start;
	ifp->if_xflags = IFXF_CLONED;
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
	ether_fakeaddr(ifp);

	sc->sc_tunnel.t_ttl = ip_defttl;
	sc->sc_tunnel.t_txhprio = 0;
	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
	sc->sc_tunnel.t_df = htons(0);

	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);

	if_counters_alloc(ifp);
	if_attach(ifp);
	ether_ifattach(ifp);

	return (0);
}

static int
egre_clone_destroy(struct ifnet *ifp)
{
	struct egre_softc *sc = ifp->if_softc;

	NET_LOCK();
	if (ISSET(ifp->if_flags, IFF_RUNNING))
		egre_down(sc);
	NET_UNLOCK();

	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
	ether_ifdetach(ifp);
	if_detach(ifp);

	free(sc, M_DEVBUF, sizeof(*sc));

	return (0);
}

static int
nvgre_clone_create(struct if_clone *ifc, int unit)
{
	struct nvgre_softc *sc;
	struct ifnet *ifp;
	struct gre_tunnel *tunnel;
	int error;

	if (nvgre_endpoint_pool.pr_size == 0) {
		pool_init(&nvgre_endpoint_pool, sizeof(union gre_addr),
		    0, IPL_SOFTNET, 0, "nvgreep", NULL);
	}

	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
	ifp = &sc->sc_ac.ac_if;

	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
	    ifc->ifc_name, unit);

	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
	    &nvgre_etherbridge_ops, sc);
	if (error != 0) {
		free(sc, M_DEVBUF, sizeof(*sc));
		return (error);
	}

	ifp->if_softc = sc;
	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
	ifp->if_ioctl = nvgre_ioctl;
	ifp->if_start = nvgre_start;
	ifp->if_xflags = IFXF_CLONED;
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
	ether_fakeaddr(ifp);

	tunnel = &sc->sc_tunnel;
	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
	tunnel->t_txhprio = 0;
	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
	tunnel->t_df = htons(IP_DF);
	tunnel->t_key_mask = GRE_KEY_ENTROPY;
	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
	    GRE_KEY_ENTROPY_SHIFT);

	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
	task_set(&sc->sc_send_task, nvgre_send, sc);
	task_set(&sc->sc_ltask, nvgre_link_change, sc);
	task_set(&sc->sc_dtask, nvgre_detach, sc);

	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);

	if_counters_alloc(ifp);
	if_attach(ifp);
	ether_ifattach(ifp);

	return (0);
}

static int
nvgre_clone_destroy(struct ifnet *ifp)
{
	struct nvgre_softc *sc = ifp->if_softc;

	NET_LOCK();
	if (ISSET(ifp->if_flags, IFF_RUNNING))
		nvgre_down(sc);
	NET_UNLOCK();

	etherbridge_destroy(&sc->sc_eb);

	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
	ether_ifdetach(ifp);
	if_detach(ifp);

	free(sc, M_DEVBUF, sizeof(*sc));

	return (0);
}

static int
eoip_clone_create(struct if_clone *ifc, int unit)
{
	struct eoip_softc *sc;
	struct ifnet *ifp;

	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
	ifp = &sc->sc_ac.ac_if;

	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
	    ifc->ifc_name, unit);

	ifp->if_softc = sc;
	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
	ifp->if_ioctl = eoip_ioctl;
	ifp->if_start = eoip_start;
	ifp->if_xflags = IFXF_CLONED;
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
	ether_fakeaddr(ifp);

	sc->sc_tunnel.t_ttl = ip_defttl;
	sc->sc_tunnel.t_txhprio = 0;
	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
	sc->sc_tunnel.t_df = htons(0);

	sc->sc_ka_timeo = 10;
	sc->sc_ka_count = 10;

	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
	sc->sc_ka_state = GRE_KA_DOWN;

	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);

	if_counters_alloc(ifp);
	if_attach(ifp);
	ether_ifattach(ifp);

	return (0);
}

static int
eoip_clone_destroy(struct ifnet *ifp)
{
	struct eoip_softc *sc = ifp->if_softc;

	NET_LOCK();
	if (ISSET(ifp->if_flags, IFF_RUNNING))
		eoip_down(sc);
	NET_UNLOCK();

	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
	ether_ifdetach(ifp);
	if_detach(ifp);

	free(sc, M_DEVBUF, sizeof(*sc));

	return (0);
}

int
gre_input(struct mbuf **mp, int *offp, int type, int af)
{
	struct mbuf *m = *mp;
	struct gre_tunnel key;
	struct ip *ip;

	ip = mtod(m, struct ip *);

	/* XXX check if ip_src is sane for nvgre? */

	key.t_af = AF_INET;
	key.t_src4 = ip->ip_dst;
	key.t_dst4 = ip->ip_src;

	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
		return (rip_input(mp, offp, type, af));

	return (IPPROTO_DONE);
}

#ifdef INET6
int
gre_input6(struct mbuf **mp, int *offp, int type, int af)
{
	struct mbuf *m = *mp;
	struct gre_tunnel key;
	struct ip6_hdr *ip6;
	uint32_t flow;

	ip6 = mtod(m, struct ip6_hdr *);

	/* XXX check if ip6_src is sane for nvgre? */

	key.t_af = AF_INET6;
	key.t_src6 = ip6->ip6_dst;
	key.t_dst6 = ip6->ip6_src;

	flow = bemtoh32(&ip6->ip6_flow);

	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
		return (rip6_input(mp, offp, type, af));

	return (IPPROTO_DONE);
}
#endif /* INET6 */

static inline struct ifnet *
gre_find(const struct gre_tunnel *key)
{
	struct gre_softc *sc;

	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
		if (gre_cmp(key, &sc->sc_tunnel) != 0)
			continue;

		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
			continue;

		return (&sc->sc_if);
	}

	return (NULL);
}

static inline struct ifnet *
mgre_find(const struct gre_tunnel *key)
{
	struct mgre_softc *sc;

	NET_ASSERT_LOCKED();
	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
	if (sc != NULL)
		return (&sc->sc_if);

	return (NULL);
}

static struct mbuf *
gre_input_1(struct gre_tunnel *key, struct mbuf *m,
    const struct gre_header *gh, uint8_t otos, int iphlen)
{
	switch (gh->gre_proto) {
	case htons(ETHERTYPE_PPP):
#ifdef PIPEX
		if (pipex_enable) {
			struct pipex_session *session;

			session = pipex_pptp_lookup_session(m);
			if (session != NULL) {
				struct mbuf *m0;

				m0 = pipex_pptp_input(m, session);
				pipex_rele_session(session);

				if (m0 == NULL)
					return (NULL);
			}
		}
#endif
		break;
	case htons(GRE_EOIP):
		return (eoip_input(key, m, gh, otos, iphlen));
		break;
	}

	return (m);
}

static int
gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
    struct gre_tunnel *key)
{
	struct mbuf *m = *mp;
	int iphlen = *offp, hlen, rxprio;
	struct ifnet *ifp;
	const struct gre_tunnel *tunnel;
	caddr_t buf;
	struct gre_header *gh;
	struct gre_h_key *gkh;
	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
	    uint8_t *, uint8_t);
	int mcast = 0;
	uint8_t itos;

	if (!gre_allow)
		goto decline;

	key->t_rtableid = m->m_pkthdr.ph_rtableid;

	hlen = iphlen + sizeof(*gh);
	if (m->m_pkthdr.len < hlen)
		goto decline;

	m = m_pullup(m, hlen);
	if (m == NULL)
		return (IPPROTO_DONE);

	buf = mtod(m, caddr_t);
	gh = (struct gre_header *)(buf + iphlen);

	/* check the version */
	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
	case htons(GRE_VERS_0):
		break;

	case htons(GRE_VERS_1):
		m = gre_input_1(key, m, gh, otos, iphlen);
		if (m == NULL)
			return (IPPROTO_DONE);
		/* FALLTHROUGH */
	default:
		goto decline;
	}

	/* the only optional bit in the header is K flag */
	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
		goto decline;

	if (gh->gre_flags & htons(GRE_KP)) {
		hlen += sizeof(*gkh);
		if (m->m_pkthdr.len < hlen)
			goto decline;

		m = m_pullup(m, hlen);
		if (m == NULL)
			return (IPPROTO_DONE);

		buf = mtod(m, caddr_t);
		gh = (struct gre_header *)(buf + iphlen);
		gkh = (struct gre_h_key *)(gh + 1);

		key->t_key_mask = GRE_KEY_MASK;
		key->t_key = gkh->gre_key;
	} else
		key->t_key_mask = GRE_KEY_NONE;

	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
		if (egre_input(key, m, hlen, otos) == -1 &&
		    nvgre_input(key, m, hlen, otos) == -1)
			goto decline;

		return (IPPROTO_DONE);
	}

	ifp = gre_find(key);
	if (ifp == NULL) {
		ifp = mgre_find(key);
		if (ifp == NULL)
			goto decline;
	}

	switch (gh->gre_proto) {
	case htons(GRE_WCCP): {
		struct mbuf *n;
		int off;

		/* WCCP/GRE:
		 *   So far as I can see (and test) it seems that Cisco's WCCP
		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
		 *   only in its protocol number.  At least, it works for me.
		 *
		 *   The Internet Drafts can be found if you look for
		 *   the following:
		 *     draft-forster-wrec-wccp-v1-00.txt
		 *     draft-wilson-wrec-wccp-v2-01.txt
		 */

		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
			goto decline;

		/*
		 * If the first nibble of the payload does not look like
		 * IPv4, assume it is WCCP v2.
		 */
		n = m_getptr(m, hlen, &off);
		if (n == NULL)
			goto decline;
		if (n->m_data[off] >> 4 != IPVERSION)
			hlen += 4;  /* four-octet Redirect header */

		/* FALLTHROUGH */
	}
	case htons(ETHERTYPE_IP):
		m->m_pkthdr.ph_family = AF_INET;
		patch = gre_ipv4_patch;
		break;
#ifdef INET6
	case htons(ETHERTYPE_IPV6):
		m->m_pkthdr.ph_family = AF_INET6;
		patch = gre_ipv6_patch;
		break;
#endif
#ifdef MPLS
	case htons(ETHERTYPE_MPLS_MCAST):
		mcast = M_MCAST|M_BCAST;
		/* fallthrough */
	case htons(ETHERTYPE_MPLS):
		m->m_pkthdr.ph_family = AF_MPLS;
		patch = gre_mpls_patch;
		break;
#endif
	case htons(0):
		if (ifp->if_type != IFT_TUNNEL) {
			/* keepalives dont make sense for mgre */
			goto decline;
		}

		m_adj(m, hlen);
		gre_keepalive_recv(ifp, m);
		return (IPPROTO_DONE);

	default:
		goto decline;
	}

	/* it's ours now */

	m_adj(m, hlen);

	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */

	m = (*patch)(tunnel, m, &itos, otos);
	if (m == NULL)
		return (IPPROTO_DONE);

	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
		SET(m->m_pkthdr.csum_flags, M_FLOWID);
		m->m_pkthdr.ph_flowid =
		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
	}

	rxprio = tunnel->t_rxhprio;
	switch (rxprio) {
	case IF_HDRPRIO_PACKET:
		/* nop */
		break;
	case IF_HDRPRIO_OUTER:
		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
		break;
	case IF_HDRPRIO_PAYLOAD:
		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
		break;
	default:
		m->m_pkthdr.pf.prio = rxprio;
		break;
	}

	m->m_flags &= ~(M_MCAST|M_BCAST);
	m->m_flags |= mcast;

	if_vinput(ifp, m);
	return (IPPROTO_DONE);
decline:
	*mp = m;
	return (-1);
}

static struct mbuf *
gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
    uint8_t *itosp, uint8_t otos)
{
	struct ip *ip;
	uint8_t itos;

	m = m_pullup(m, sizeof(*ip));
	if (m == NULL)
		return (NULL);

	ip = mtod(m, struct ip *);

	itos = ip->ip_tos;
	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
		m_freem(m);
		return (NULL);
	}
	if (itos != ip->ip_tos)
		ip_tos_patch(ip, itos);

	*itosp = itos;

	return (m);
}

#ifdef INET6
static struct mbuf *
gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
    uint8_t *itosp, uint8_t otos)
{
	struct ip6_hdr *ip6;
	uint32_t flow;
	uint8_t itos;

	m = m_pullup(m, sizeof(*ip6));
	if (m == NULL)
		return (NULL);

	ip6 = mtod(m, struct ip6_hdr *);

	flow = bemtoh32(&ip6->ip6_flow);
	itos = flow >> 20;
	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
		m_freem(m);
		return (NULL);
	}

	CLR(flow, 0xff << 20);
	SET(flow, itos << 20);
	htobem32(&ip6->ip6_flow, flow);

	*itosp = itos;

	return (m);
}
#endif

#ifdef MPLS
static struct mbuf *
gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
    uint8_t *itosp, uint8_t otos)
{
	uint8_t itos;
	uint32_t shim;

	m = m_pullup(m, sizeof(shim));
	if (m == NULL)
		return (NULL);

	shim = *mtod(m, uint32_t *);
	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;

	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
		m_freem(m);
		return (NULL);
	}

	*itosp = itos;

	return (m);
}
#endif

#define gre_l2_prio(_t, _m, _otos) do {					\
	int rxprio = (_t)->t_rxhprio;					\
	switch (rxprio) {						\
	case IF_HDRPRIO_PACKET:						\
		/* nop */						\
		break;							\
	case IF_HDRPRIO_OUTER:						\
		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
		break;							\
	default:							\
		(_m)->m_pkthdr.pf.prio = rxprio;			\
		break;							\
	}								\
} while (0)

static int
egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
{
	struct egre_softc *sc;

	NET_ASSERT_LOCKED();
	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
	if (sc == NULL)
		return (-1);

	/* it's ours now */
	m = gre_ether_align(m, hlen);
	if (m == NULL)
		return (0);

	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
		SET(m->m_pkthdr.csum_flags, M_FLOWID);
		m->m_pkthdr.ph_flowid =
		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
	}

	m->m_flags &= ~(M_MCAST|M_BCAST);

	gre_l2_prio(&sc->sc_tunnel, m, otos);

	if_vinput(&sc->sc_ac.ac_if, m);

	return (0);
}

static inline struct nvgre_softc *
nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
{
	struct nvgre_softc *sc;
	int rv;

	/*
	 * building an nvgre_softc to use with RBT_FIND is expensive, and
	 * would need to swap the src and dst addresses in the key. so do the
	 * find by hand.
	 */

	NET_ASSERT_LOCKED();
	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
	while (sc != NULL) {
		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
		if (rv == 0)
			return (sc);
		if (rv < 0)
			sc = RBT_LEFT(nvgre_mcast_tree, sc);
		else
			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
	}

	return (NULL);
}

static inline struct nvgre_softc *
nvgre_ucast_find(const struct gre_tunnel *key)
{
	NET_ASSERT_LOCKED();
	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
	    (struct nvgre_softc *)key));
}

static int
nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
    uint8_t otos)
{
	struct nvgre_softc *sc;
	struct ether_header *eh;

	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
	else
		sc = nvgre_ucast_find(key);

	if (sc == NULL)
		return (-1);

	/* it's ours now */
	m = gre_ether_align(m, hlen);
	if (m == NULL)
		return (0);

	eh = mtod(m, struct ether_header *);
	etherbridge_map_ea(&sc->sc_eb, (void *)&key->t_dst,
	    (struct ether_addr *)eh->ether_shost);

	SET(m->m_pkthdr.csum_flags, M_FLOWID);
	m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;

	m->m_flags &= ~(M_MCAST|M_BCAST);

	gre_l2_prio(&sc->sc_tunnel, m, otos);

	if_vinput(&sc->sc_ac.ac_if, m);

	return (0);
}

static struct mbuf *
gre_ether_align(struct mbuf *m, int hlen)
{
	struct mbuf *n;
	int off;

	m_adj(m, hlen);

	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
		m_freem(m);
		return (NULL);
	}

	m = m_pullup(m, sizeof(struct ether_header));
	if (m == NULL)
		return (NULL);

	n = m_getptr(m, sizeof(struct ether_header), &off);
	if (n == NULL) {
		m_freem(m);
		return (NULL);
	}

	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
		m_freem(m);
		if (n == NULL)
			return (NULL);
		m = n;
	}

	return (m);
}

static void
gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
{
	struct gre_softc *sc = ifp->if_softc;
	struct gre_keepalive *gk;
	SIPHASH_CTX ctx;
	uint8_t digest[SIPHASH_DIGEST_LENGTH];
	int uptime, delta;
	int tick = ticks;

	if (sc->sc_ka_state == GRE_KA_NONE ||
	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
		goto drop;

	if (m->m_pkthdr.len < sizeof(*gk))
		goto drop;
	m = m_pullup(m, sizeof(*gk));
	if (m == NULL)
		return;

	gk = mtod(m, struct gre_keepalive *);
	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
	delta = tick - uptime;
	if (delta < 0)
		goto drop;
	if (delta > hz * 10) /* magic */
		goto drop;

	/* avoid too much siphash work */
	delta = tick - sc->sc_ka_recvtm;
	if (delta > 0 && delta < (hz / 10))
		goto drop;

	SipHash24_Init(&ctx, &sc->sc_ka_key);
	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
	SipHash24_Final(digest, &ctx);

	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
		goto drop;

	sc->sc_ka_recvtm = tick;

	switch (sc->sc_ka_state) {
	case GRE_KA_DOWN:
		sc->sc_ka_state = GRE_KA_HOLD;
		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
		    16 * sc->sc_ka_count);
		break;
	case GRE_KA_HOLD:
		if (--sc->sc_ka_holdcnt > 0)
			break;

		sc->sc_ka_state = GRE_KA_UP;
		gre_link_state(&sc->sc_if, sc->sc_ka_state);
		break;

	case GRE_KA_UP:
		sc->sc_ka_holdmax--;
		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
		break;
	}

	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);

drop:
	m_freem(m);
}

static int
gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
    struct rtentry *rt)
{
	struct m_tag *mtag;
	int error = 0;

	if (!gre_allow) {
		error = EACCES;
		goto drop;
	}

	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
		error = ENETDOWN;
		goto drop;
	}

	switch (dst->sa_family) {
	case AF_INET:
#ifdef INET6
	case AF_INET6:
#endif
#ifdef MPLS
	case AF_MPLS:
#endif
		break;
	default:
		error = EAFNOSUPPORT;
		goto drop;
	}

	/* Try to limit infinite recursion through misconfiguration. */
	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
		    sizeof(ifp->if_index)) == 0) {
			m_freem(m);
			error = EIO;
			goto end;
		}
	}

	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
	if (mtag == NULL) {
		m_freem(m);
		error = ENOBUFS;
		goto end;
	}
	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
	m_tag_prepend(m, mtag);

	m->m_pkthdr.ph_family = dst->sa_family;

	error = if_enqueue(ifp, m);
end:
	if (error)
		ifp->if_oerrors++;
	return (error);

drop:
	m_freem(m);
	return (error);
}

void
gre_start(struct ifnet *ifp)
{
	struct gre_softc *sc = ifp->if_softc;
	struct mbuf *m;
	int af;
#if NBPFILTER > 0
	caddr_t if_bpf;
#endif

	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
		af = m->m_pkthdr.ph_family;

#if NBPFILTER > 0
		if_bpf = ifp->if_bpf;
		if (if_bpf)
			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
#endif

		m = gre_l3_encap(&sc->sc_tunnel, m, af);
		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
			ifp->if_oerrors++;
			continue;
		}
	}
}

void
mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
{
	struct ifnet *lo0ifp;
	struct ifaddr *ifa, *lo0ifa;

	switch (req) {
	case RTM_ADD:
		if (!ISSET(rt->rt_flags, RTF_LOCAL))
			break;

		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
			if (memcmp(rt_key(rt), ifa->ifa_addr,
			    rt_key(rt)->sa_len) == 0)
				break;
		}

		if (ifa == NULL)
			break;

		KASSERT(ifa == rt->rt_ifa);

		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
		KASSERT(lo0ifp != NULL);
		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
			if (lo0ifa->ifa_addr->sa_family ==
			    ifa->ifa_addr->sa_family)
				break;
		}
		if_put(lo0ifp);

		if (lo0ifa == NULL)
			break;

		rt->rt_flags &= ~RTF_LLINFO;
		break;
	case RTM_DELETE:
	case RTM_RESOLVE:
	default:
		break;
	}
}

static int
mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
    struct rtentry *rt0)
{
	struct mgre_softc *sc = ifp->if_softc;
	struct sockaddr *gate;
	struct rtentry *rt;
	struct m_tag *mtag;
	int error = 0;
	sa_family_t af;
	const void *addr;

	if (!gre_allow) {
		error = EACCES;
		goto drop;
	}

	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
		error = ENETDOWN;
		goto drop;
	}

	switch (dest->sa_family) {
	case AF_INET:
#ifdef INET6
	case AF_INET6:
#endif
#ifdef MPLS
	case AF_MPLS:
#endif
		break;
	default:
		error = EAFNOSUPPORT;
		goto drop;
	}

	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
		error = ENETUNREACH;
		goto drop;
	}

	rt = rt_getll(rt0);

	/* check rt_expire? */
	if (ISSET(rt->rt_flags, RTF_REJECT)) {
		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
		goto drop;
	}
	if (!ISSET(rt->rt_flags, RTF_HOST)) {
		error = EHOSTUNREACH;
		goto drop;
	}
	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
		error = EINVAL;
		goto drop;
	}

	gate = rt->rt_gateway;
	af = gate->sa_family;
	if (af != sc->sc_tunnel.t_af) {
		error = EAGAIN;
		goto drop;
	}

	/* Try to limit infinite recursion through misconfiguration. */
	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
		    sizeof(ifp->if_index)) == 0) {
			error = EIO;
			goto drop;
		}
	}

	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
	if (mtag == NULL) {
		error = ENOBUFS;
		goto drop;
	}
	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
	m_tag_prepend(m, mtag);

	switch (af) {
	case AF_INET: {
		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
		addr = &sin->sin_addr;
		break;
	}
#ifdef INET6
	case AF_INET6: {
		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
		addr = &sin6->sin6_addr;
		break;
	}
 #endif
	default:
		unhandled_af(af);
		/* NOTREACHED */
	}

	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
	if (m == NULL) {
		ifp->if_oerrors++;
		return (ENOBUFS);
	}

	m->m_pkthdr.ph_family = dest->sa_family;

	error = if_enqueue(ifp, m);
	if (error)
		ifp->if_oerrors++;
	return (error);

drop:
	m_freem(m);
	return (error);
}

static void
mgre_start(struct ifnet *ifp)
{
	struct mgre_softc *sc = ifp->if_softc;
	struct mbuf *m;
#if NBPFILTER > 0
	caddr_t if_bpf;
#endif

	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
#if NBPFILTER > 0
		if_bpf = ifp->if_bpf;
		if (if_bpf) {
			struct m_hdr mh;
			struct mbuf *n;
			int off;

			n = m_getptr(m, ifp->if_hdrlen, &off);
			KASSERT(n != NULL);

			mh.mh_flags = 0;
			mh.mh_next = n->m_next;
			mh.mh_len = n->m_len - off;
			mh.mh_data = n->m_data + off;

			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
		}
#endif

		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
			ifp->if_oerrors++;
			continue;
		}
	}
}

static void
egre_start(struct ifnet *ifp)
{
	struct egre_softc *sc = ifp->if_softc;
	struct mbuf *m0, *m;
#if NBPFILTER > 0
	caddr_t if_bpf;
#endif

	if (!gre_allow) {
		ifq_purge(&ifp->if_snd);
		return;
	}

	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
#if NBPFILTER > 0
		if_bpf = ifp->if_bpf;
		if (if_bpf)
			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
#endif

		/* force prepend mbuf because of alignment problems */
		m = m_get(M_DONTWAIT, m0->m_type);
		if (m == NULL) {
			m_freem(m0);
			continue;
		}

		M_MOVE_PKTHDR(m, m0);
		m->m_next = m0;

		m_align(m, 0);
		m->m_len = 0;

		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
			ifp->if_oerrors++;
			continue;
		}
	}
}

static struct mbuf *
gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
    struct mbuf *m, sa_family_t af)
{
	uint16_t proto;
	uint8_t ttl, itos, otos;
	int tttl = tunnel->t_ttl;
	int ttloff;

	switch (af) {
	case AF_INET: {
		struct ip *ip;

		m = m_pullup(m, sizeof(*ip));
		if (m == NULL)
			return (NULL);

		ip = mtod(m, struct ip *);
		itos = ip->ip_tos;

		ttloff = offsetof(struct ip, ip_ttl);
		proto = htons(ETHERTYPE_IP);
		break;
	}
#ifdef INET6
	case AF_INET6: {
		struct ip6_hdr *ip6;

		m = m_pullup(m, sizeof(*ip6));
		if (m == NULL)
			return (NULL);

		ip6 = mtod(m, struct ip6_hdr *);
		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;

		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
		proto = htons(ETHERTYPE_IPV6);
		break;
	}
 #endif
#ifdef MPLS
	case AF_MPLS: {
		uint32_t shim;

		m = m_pullup(m, sizeof(shim));
		if (m == NULL)
			return (NULL);

		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
		itos = (shim >> MPLS_EXP_OFFSET) << 5;

		ttloff = 3;

		if (m->m_flags & (M_BCAST | M_MCAST))
			proto = htons(ETHERTYPE_MPLS_MCAST);
		else
			proto = htons(ETHERTYPE_MPLS);
		break;
	}
#endif
	default:
		unhandled_af(af);
	}

	if (tttl == -1) {
		KASSERT(m->m_len > ttloff); /* m_pullup has happened */

		ttl = *(m->m_data + ttloff);
	} else
		ttl = tttl;

	itos = gre_l3_tos(tunnel, m, itos);
	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);

	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
}

static struct mbuf *
gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
    struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
{
	struct gre_header *gh;
	struct gre_h_key *gkh;
	int hlen;

	hlen = sizeof(*gh);
	if (tunnel->t_key_mask != GRE_KEY_NONE)
		hlen += sizeof(*gkh);

	m = m_prepend(m, hlen, M_DONTWAIT);
	if (m == NULL)
		return (NULL);

	gh = mtod(m, struct gre_header *);
	gh->gre_flags = GRE_VERS_0;
	gh->gre_proto = proto;
	if (tunnel->t_key_mask != GRE_KEY_NONE) {
		gh->gre_flags |= htons(GRE_KP);

		gkh = (struct gre_h_key *)(gh + 1);
		gkh->gre_key = tunnel->t_key;

		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
		    ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
			    m->m_pkthdr.ph_flowid);
		}
	}

	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
}

static struct mbuf *
gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
    struct mbuf *m, uint8_t ttl, uint8_t tos)
{
	switch (tunnel->t_af) {
	case AF_UNSPEC:
		/* packets may arrive before tunnel is set up */
		m_freem(m);
		return (NULL);
	case AF_INET: {
		struct ip *ip;

		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
		if (m == NULL)
			return (NULL);

		ip = mtod(m, struct ip *);
		ip->ip_v = IPVERSION;
		ip->ip_hl = sizeof(*ip) >> 2;
		ip->ip_off = tunnel->t_df;
		ip->ip_tos = tos;
		ip->ip_len = htons(m->m_pkthdr.len);
		ip->ip_ttl = ttl;
		ip->ip_p = IPPROTO_GRE;
		ip->ip_src = tunnel->t_src4;
		ip->ip_dst = dst->in4;
		break;
	}
#ifdef INET6
	case AF_INET6: {
		struct ip6_hdr *ip6;
		int len = m->m_pkthdr.len;

		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
		if (m == NULL)
			return (NULL);

		ip6 = mtod(m, struct ip6_hdr *);
		ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
		    htonl(m->m_pkthdr.ph_flowid) : 0;
		ip6->ip6_vfc |= IPV6_VERSION;
		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
		ip6->ip6_plen = htons(len);
		ip6->ip6_nxt = IPPROTO_GRE;
		ip6->ip6_hlim = ttl;
		ip6->ip6_src = tunnel->t_src6;
		ip6->ip6_dst = dst->in6;

		if (tunnel->t_df)
			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);

		break;
	}
#endif /* INET6 */
	default:
		unhandled_af(tunnel->t_af);
	}

	return (m);
}

static int
gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
{
	m->m_flags &= ~(M_BCAST|M_MCAST);
	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;

#if NPF > 0
	pf_pkt_addr_changed(m);
#endif

	switch (tunnel->t_af) {
	case AF_INET:
		ip_send(m);
		break;
#ifdef INET6
	case AF_INET6:
		ip6_send(m);
		break;
#endif
	default:
		unhandled_af(tunnel->t_af);
	}

	return (0);
}

static int
gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
    u_long cmd, void *data)
{
	struct ifreq *ifr = (struct ifreq *)data;
	int error = 0;

	switch(cmd) {
	case SIOCSIFMTU:
		if (ifr->ifr_mtu < 576) {
			error = EINVAL;
			break;
		}
		ifp->if_mtu = ifr->ifr_mtu;
		break;
	case SIOCADDMULTI:
	case SIOCDELMULTI:
		break;

	case SIOCSVNETID:
		error = gre_set_vnetid(tunnel, ifr);
		break;

	case SIOCGVNETID:
		error = gre_get_vnetid(tunnel, ifr);
		break;
	case SIOCDVNETID:
		error = gre_del_vnetid(tunnel);
		break;

	case SIOCSVNETFLOWID:
		error = gre_set_vnetflowid(tunnel, ifr);
		break;

	case SIOCGVNETFLOWID:
		error = gre_get_vnetflowid(tunnel, ifr);
		break;

	case SIOCSLIFPHYADDR:
		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
		break;
	case SIOCGLIFPHYADDR:
		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
		break;
	case SIOCDIFPHYADDR:
		error = gre_del_tunnel(tunnel);
		break;

	case SIOCSLIFPHYRTABLE:
		if (ifr->ifr_rdomainid < 0 ||
		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
		    !rtable_exists(ifr->ifr_rdomainid)) {
			error = EINVAL;
			break;
		}
		tunnel->t_rtableid = ifr->ifr_rdomainid;
		break;
	case SIOCGLIFPHYRTABLE:
		ifr->ifr_rdomainid = tunnel->t_rtableid;
		break;

	case SIOCSLIFPHYDF:
		/* commit */
		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
		break;
	case SIOCGLIFPHYDF:
		ifr->ifr_df = tunnel->t_df ? 1 : 0;
		break;

	default:
		error = ENOTTY;
		break;
	}

	return (error);
}

static uint8_t
gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
{
	uint8_t prio;

	switch (t->t_txhprio) {
	case IF_HDRPRIO_PACKET:
		prio = m->m_pkthdr.pf.prio;
		break;
	default:
		prio = t->t_txhprio;
		break;
	}

	return (IFQ_PRIO2TOS(prio));
}

static uint8_t
gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
{
	uint8_t prio;

	switch (t->t_txhprio) {
	case IF_HDRPRIO_PAYLOAD:
		return (tos);
	case IF_HDRPRIO_PACKET:
		prio = m->m_pkthdr.pf.prio;
		break;
	default:
		prio = t->t_txhprio;
		break;
	}

	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
}

static int
gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	struct gre_softc *sc = ifp->if_softc;
	struct ifreq *ifr = (struct ifreq *)data;
	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
	int error = 0;

	switch(cmd) {
	case SIOCSIFADDR:
		ifp->if_flags |= IFF_UP;
		/* FALLTHROUGH */
	case SIOCSIFFLAGS:
		if (ISSET(ifp->if_flags, IFF_UP)) {
			if (!ISSET(ifp->if_flags, IFF_RUNNING))
				error = gre_up(sc);
			else
				error = 0;
		} else {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				error = gre_down(sc);
		}
		break;
	case SIOCSIFRDOMAIN:
		/* let if_rdomain do its thing */
		error = ENOTTY;
		break;

	case SIOCSETKALIVE:
		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
			return (EINVAL);

		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
			sc->sc_ka_count = 0;
			sc->sc_ka_timeo = 0;
			sc->sc_ka_state = GRE_KA_NONE;
		} else {
			sc->sc_ka_count = ikar->ikar_cnt;
			sc->sc_ka_timeo = ikar->ikar_timeo;
			sc->sc_ka_state = GRE_KA_DOWN;

			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
			sc->sc_ka_bias = arc4random();
			sc->sc_ka_holdmax = sc->sc_ka_count;

			sc->sc_ka_recvtm = ticks - hz;
			timeout_add(&sc->sc_ka_send, 1);
			timeout_add_sec(&sc->sc_ka_hold,
			    sc->sc_ka_timeo * sc->sc_ka_count);
		}
		break;

	case SIOCGETKALIVE:
		ikar->ikar_cnt = sc->sc_ka_count;
		ikar->ikar_timeo = sc->sc_ka_timeo;
		break;

	case SIOCSLIFPHYTTL:
		if (ifr->ifr_ttl != -1 &&
		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
			error = EINVAL;
			break;
		}

		/* commit */
		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
		break;

	case SIOCGLIFPHYTTL:
		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
		break;

	case SIOCSLIFPHYECN:
		sc->sc_tunnel.t_ecn =
		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
		break;
	case SIOCGLIFPHYECN:
		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
		break;

	case SIOCSTXHPRIO:
		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGTXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
		break;

	case SIOCSRXHPRIO:
		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGRXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
		break;

	default:
		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
		break;
	}

	return (error);
}

static int
mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	struct mgre_softc *sc = ifp->if_softc;
	struct ifreq *ifr = (struct ifreq *)data;
	int error = 0;

	switch(cmd) {
	case SIOCSIFADDR:
		break;
	case SIOCSIFFLAGS:
		if (ISSET(ifp->if_flags, IFF_UP)) {
			if (!ISSET(ifp->if_flags, IFF_RUNNING))
				error = mgre_up(sc);
			else
				error = 0;
		} else {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				error = mgre_down(sc);
		}
		break;

	case SIOCSLIFPHYTTL:
		if (ifr->ifr_ttl != -1 &&
		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
			error = EINVAL;
			break;
		}

		/* commit */
		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
		break;

	case SIOCGLIFPHYTTL:
		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
		break;

	case SIOCSLIFPHYECN:
		sc->sc_tunnel.t_ecn =
		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
		break;
	case SIOCGLIFPHYECN:
		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
		break;

	case SIOCSLIFPHYADDR:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
		break;
	case SIOCGLIFPHYADDR:
		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
		break;

	case SIOCSTXHPRIO:
		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGTXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
		break;

	case SIOCSRXHPRIO:
		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGRXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
		break;

	case SIOCSVNETID:
	case SIOCDVNETID:
	case SIOCDIFPHYADDR:
	case SIOCSLIFPHYRTABLE:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}

		/* FALLTHROUGH */
	default:
		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
		break;
	}

	return (error);
}

static int
mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
{
	struct gre_tunnel *tunnel = &sc->sc_tunnel;
	struct sockaddr *addr = (struct sockaddr *)&req->addr;
	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
	struct sockaddr_in *addr4;
#ifdef INET6
	struct sockaddr_in6 *addr6;
	int error;
#endif

	if (dstaddr->sa_family != AF_UNSPEC)
		return (EINVAL);

	/* validate */
	switch (addr->sa_family) {
	case AF_INET:
		if (addr->sa_len != sizeof(*addr4))
			return (EINVAL);

		addr4 = (struct sockaddr_in *)addr;
		if (in_nullhost(addr4->sin_addr) ||
		    IN_MULTICAST(addr4->sin_addr.s_addr))
			return (EINVAL);

		tunnel->t_src4 = addr4->sin_addr;
		tunnel->t_dst4.s_addr = INADDR_ANY;

		break;
#ifdef INET6
	case AF_INET6:
		if (addr->sa_len != sizeof(*addr6))
			return (EINVAL);

		addr6 = (struct sockaddr_in6 *)addr;
		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
			return (EINVAL);

		error = in6_embedscope(&tunnel->t_src6, addr6, NULL, NULL);
		if (error != 0)
			return (error);

		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));

		break;
#endif
	default:
		return (EAFNOSUPPORT);
	}

	/* commit */
	tunnel->t_af = addr->sa_family;

	return (0);
}

static int
mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
{
	struct gre_tunnel *tunnel = &sc->sc_tunnel;
	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
	struct sockaddr_in *sin;
#ifdef INET6
	struct sockaddr_in6 *sin6;
#endif

	switch (tunnel->t_af) {
	case AF_UNSPEC:
		return (EADDRNOTAVAIL);
	case AF_INET:
		sin = (struct sockaddr_in *)&req->addr;
		memset(sin, 0, sizeof(*sin));
		sin->sin_family = AF_INET;
		sin->sin_len = sizeof(*sin);
		sin->sin_addr = tunnel->t_src4;
		break;

#ifdef INET6
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)&req->addr;
		memset(sin6, 0, sizeof(*sin6));
		sin6->sin6_family = AF_INET6;
		sin6->sin6_len = sizeof(*sin6);
		in6_recoverscope(sin6, &tunnel->t_src6);
		break;
#endif
	default:
		unhandled_af(tunnel->t_af);
	}

	dstaddr->sa_len = 2;
	dstaddr->sa_family = AF_UNSPEC;

	return (0);
}

static int
egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	struct egre_softc *sc = ifp->if_softc;
	struct ifreq *ifr = (struct ifreq *)data;
	int error = 0;

	switch(cmd) {
	case SIOCSIFADDR:
		break;
	case SIOCSIFFLAGS:
		if (ISSET(ifp->if_flags, IFF_UP)) {
			if (!ISSET(ifp->if_flags, IFF_RUNNING))
				error = egre_up(sc);
			else
				error = 0;
		} else {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				error = egre_down(sc);
		}
		break;

	case SIOCSLIFPHYTTL:
		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
			error = EINVAL;
			break;
		}

		/* commit */
		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
		break;

	case SIOCGLIFPHYTTL:
		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
		break;

	case SIOCSTXHPRIO:
		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGTXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
		break;

	case SIOCSRXHPRIO:
		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGRXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
		break;

	case SIOCSVNETID:
	case SIOCDVNETID:
	case SIOCSVNETFLOWID:
	case SIOCSLIFPHYADDR:
	case SIOCDIFPHYADDR:
	case SIOCSLIFPHYRTABLE:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}

		/* FALLTHROUGH */
	default:
		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
		if (error == ENOTTY)
			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
		break;
	}

	if (error == ENETRESET) {
		/* no hardware to program */
		error = 0;
	}

	return (error);
}

static int
nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	struct nvgre_softc *sc = ifp->if_softc;
	struct gre_tunnel *tunnel = &sc->sc_tunnel;

	struct ifreq *ifr = (struct ifreq *)data;
	struct if_parent *parent = (struct if_parent *)data;
	struct ifbrparam *bparam = (struct ifbrparam *)data;
	struct ifnet *ifp0;

	int error = 0;

	switch (cmd) {
	case SIOCSIFADDR:
		break;
	case SIOCSIFFLAGS:
		if (ISSET(ifp->if_flags, IFF_UP)) {
			if (!ISSET(ifp->if_flags, IFF_RUNNING))
				error = nvgre_up(sc);
			else
				error = ENETRESET;
		} else {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				error = nvgre_down(sc);
		}
		break;

	case SIOCSLIFPHYADDR:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
		if (error == 0)
			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
		break;
	case SIOCGLIFPHYADDR:
		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
		break;
	case SIOCDIFPHYADDR:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		error = gre_del_tunnel(tunnel);
		if (error == 0)
			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
		break;

	case SIOCSIFPARENT:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		error = nvgre_set_parent(sc, parent->ifp_parent);
		if (error == 0)
			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
		break;
	case SIOCGIFPARENT:
		ifp0 = if_get(sc->sc_ifp0);
		if (ifp0 == NULL)
			error = EADDRNOTAVAIL;
		else {
			memcpy(parent->ifp_parent, ifp0->if_xname,
			    sizeof(parent->ifp_parent));
		}
		if_put(ifp0);
		break;
	case SIOCDIFPARENT:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		/* commit */
		sc->sc_ifp0 = 0;
		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
		break;

	case SIOCSVNETID:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
			error = EINVAL;
			break;
		}

		/* commit */
		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
		break;
	case SIOCGVNETID:
		error = gre_get_vnetid(tunnel, ifr);
		break;

	case SIOCSLIFPHYRTABLE:
		if (ifr->ifr_rdomainid < 0 ||
		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
		    !rtable_exists(ifr->ifr_rdomainid)) {
			error = EINVAL;
			break;
		}
		tunnel->t_rtableid = ifr->ifr_rdomainid;
		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
		break;
	case SIOCGLIFPHYRTABLE:
		ifr->ifr_rdomainid = tunnel->t_rtableid;
		break;

	case SIOCSLIFPHYDF:
		/* commit */
		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
		break;
	case SIOCGLIFPHYDF:
		ifr->ifr_df = tunnel->t_df ? 1 : 0;
		break;

	case SIOCSLIFPHYTTL:
		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
			error = EINVAL;
			break;
		}

		/* commit */
		tunnel->t_ttl = ifr->ifr_ttl;
		break;

	case SIOCGLIFPHYTTL:
		ifr->ifr_ttl = tunnel->t_ttl;
		break;

	case SIOCSTXHPRIO:
		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGTXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
		break;

	case SIOCSRXHPRIO:
		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGRXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
		break;

	case SIOCBRDGSCACHE:
		error = etherbridge_set_max(&sc->sc_eb, bparam);
		break;
	case SIOCBRDGGCACHE:
		error = etherbridge_get_max(&sc->sc_eb, bparam);
		break;

	case SIOCBRDGSTO:
		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
		break;
	case SIOCBRDGGTO:
		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
		break;

	case SIOCBRDGRTS:
		error = etherbridge_rtfind(&sc->sc_eb,
		    (struct ifbaconf *)data);
		break;
	case SIOCBRDGFLUSH:
		etherbridge_flush(&sc->sc_eb,
		    ((struct ifbreq *)data)->ifbr_ifsflags);
		break;
	case SIOCBRDGSADDR:
		error = nvgre_add_addr(sc, (struct ifbareq *)data);
		break;
	case SIOCBRDGDADDR:
		error = nvgre_del_addr(sc, (struct ifbareq *)data);
		break;

	case SIOCADDMULTI:
	case SIOCDELMULTI:
		break;

	default:
		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
		break;
	}

	if (error == ENETRESET) {
		/* no hardware to program */
		error = 0;
	}

	return (error);
}

static int
eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
	struct eoip_softc *sc = ifp->if_softc;
	struct ifreq *ifr = (struct ifreq *)data;
	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
	int error = 0;

	switch(cmd) {
	case SIOCSIFADDR:
		break;
	case SIOCSIFFLAGS:
		if (ISSET(ifp->if_flags, IFF_UP)) {
			if (!ISSET(ifp->if_flags, IFF_RUNNING))
				error = eoip_up(sc);
			else
				error = 0;
		} else {
			if (ISSET(ifp->if_flags, IFF_RUNNING))
				error = eoip_down(sc);
		}
		break;

	case SIOCSETKALIVE:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}

		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
			return (EINVAL);

		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
			sc->sc_ka_count = 0;
			sc->sc_ka_timeo = 0;
			sc->sc_ka_state = GRE_KA_NONE;
		} else {
			sc->sc_ka_count = ikar->ikar_cnt;
			sc->sc_ka_timeo = ikar->ikar_timeo;
			sc->sc_ka_state = GRE_KA_DOWN;
		}
		break;

	case SIOCGETKALIVE:
		ikar->ikar_cnt = sc->sc_ka_count;
		ikar->ikar_timeo = sc->sc_ka_timeo;
		break;

	case SIOCSVNETID:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}
		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
			return (EINVAL);

		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
		break;

	case SIOCGVNETID:
		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
		break;

	case SIOCSLIFPHYADDR:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}

		error = gre_set_tunnel(&sc->sc_tunnel,
		    (struct if_laddrreq *)data, 1);
		break;
	case SIOCGLIFPHYADDR:
		error = gre_get_tunnel(&sc->sc_tunnel,
		    (struct if_laddrreq *)data);
		break;
	case SIOCDIFPHYADDR:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}

		error = gre_del_tunnel(&sc->sc_tunnel);
		break;

	case SIOCSLIFPHYRTABLE:
		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
			error = EBUSY;
			break;
		}

		if (ifr->ifr_rdomainid < 0 ||
		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
		    !rtable_exists(ifr->ifr_rdomainid)) {
			error = EINVAL;
			break;
		}
		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
		break;
	case SIOCGLIFPHYRTABLE:
		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
		break;

	case SIOCSLIFPHYTTL:
		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
			error = EINVAL;
			break;
		}

		/* commit */
		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
		break;
	case SIOCGLIFPHYTTL:
		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
		break;

	case SIOCSLIFPHYDF:
		/* commit */
		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
		break;
	case SIOCGLIFPHYDF:
		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
		break;

	case SIOCSTXHPRIO:
		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGTXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
		break;

	case SIOCSRXHPRIO:
		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
		if (error != 0)
			break;

		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
		break;
	case SIOCGRXHPRIO:
		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
		break;

	case SIOCADDMULTI:
	case SIOCDELMULTI:
		break;

	default:
		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
		break;
	}

	if (error == ENETRESET) {
		/* no hardware to program */
		error = 0;
	}

	return (error);
}

static int
gre_up(struct gre_softc *sc)
{
	NET_ASSERT_LOCKED();
	SET(sc->sc_if.if_flags, IFF_RUNNING);

	if (sc->sc_ka_state != GRE_KA_NONE)
		gre_keepalive_send(sc);

	return (0);
}

static int
gre_down(struct gre_softc *sc)
{
	NET_ASSERT_LOCKED();
	CLR(sc->sc_if.if_flags, IFF_RUNNING);

	if (sc->sc_ka_state != GRE_KA_NONE) {
		timeout_del_barrier(&sc->sc_ka_hold);
		timeout_del_barrier(&sc->sc_ka_send);

		sc->sc_ka_state = GRE_KA_DOWN;
		gre_link_state(&sc->sc_if, sc->sc_ka_state);
	}

	return (0);
}

static void
gre_link_state(struct ifnet *ifp, unsigned int state)
{
	int link_state = LINK_STATE_UNKNOWN;

	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
		switch (state) {
		case GRE_KA_NONE:
			/* maybe up? or down? it's unknown, really */
			break;
		case GRE_KA_UP:
			link_state = LINK_STATE_UP;
			break;
		default:
			link_state = LINK_STATE_KALIVE_DOWN;
			break;
		}
	}

	if (ifp->if_link_state != link_state) {
		ifp->if_link_state = link_state;
		if_link_state_change(ifp);
	}
}

static void
gre_keepalive_send(void *arg)
{
	struct gre_tunnel t;
	struct gre_softc *sc = arg;
	struct mbuf *m;
	struct gre_keepalive *gk;
	SIPHASH_CTX ctx;
	int linkhdr, len;
	uint16_t proto;
	uint8_t ttl;
	uint8_t tos;

	/*
	 * re-schedule immediately, so we deal with incomplete configuration
	 * or temporary errors.
	 */
	if (sc->sc_ka_timeo)
		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);

	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
	    sc->sc_ka_state == GRE_KA_NONE ||
	    sc->sc_tunnel.t_af == AF_UNSPEC ||
	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
		return;

	/* this is really conservative */
#ifdef INET6
	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
#else
	linkhdr = max_linkhdr + sizeof(struct ip) +
	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
#endif
	len = linkhdr + sizeof(*gk);

	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m == NULL)
		return;

	if (len > MHLEN) {
		MCLGETL(m, M_DONTWAIT, len);
		if (!ISSET(m->m_flags, M_EXT)) {
			m_freem(m);
			return;
		}
	}

	m->m_pkthdr.len = m->m_len = len;
	m_adj(m, linkhdr);

	/*
	 * build the inside packet
	 */
	gk = mtod(m, struct gre_keepalive *);
	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
	htobem32(&gk->gk_random, arc4random());

	SipHash24_Init(&ctx, &sc->sc_ka_key);
	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
	SipHash24_Final(gk->gk_digest, &ctx);

	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;

	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));

	t.t_af = sc->sc_tunnel.t_af;
	t.t_df = sc->sc_tunnel.t_df;
	t.t_src = sc->sc_tunnel.t_dst;
	t.t_dst = sc->sc_tunnel.t_src;
	t.t_key = sc->sc_tunnel.t_key;
	t.t_key_mask = sc->sc_tunnel.t_key_mask;

	m = gre_encap(&t, m, htons(0), ttl, tos);
	if (m == NULL)
		return;

	switch (sc->sc_tunnel.t_af) {
	case AF_INET: {
		struct ip *ip;

		ip = mtod(m, struct ip *);
		ip->ip_id = htons(ip_randomid());
		in_hdr_cksum_out(m, NULL);

		proto = htons(ETHERTYPE_IP);
		break;
	}
#ifdef INET6
	case AF_INET6:
		proto = htons(ETHERTYPE_IPV6);
		break;
#endif
	default:
		m_freem(m);
		return;
	}

	/*
	 * put it in the tunnel
	 */
	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
	if (m == NULL)
		return;

	gre_ip_output(&sc->sc_tunnel, m);
}

static void
gre_keepalive_hold(void *arg)
{
	struct gre_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_if;

	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
	    sc->sc_ka_state == GRE_KA_NONE)
		return;

	NET_LOCK();
	sc->sc_ka_state = GRE_KA_DOWN;
	gre_link_state(ifp, sc->sc_ka_state);
	NET_UNLOCK();
}

static int
gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
{
	struct sockaddr *src = (struct sockaddr *)&req->addr;
	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
	struct sockaddr_in *src4, *dst4;
#ifdef INET6
	struct sockaddr_in6 *src6, *dst6;
	int error;
#endif

	/* sa_family and sa_len must be equal */
	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
		return (EINVAL);

	/* validate */
	switch (dst->sa_family) {
	case AF_INET:
		if (dst->sa_len != sizeof(*dst4))
			return (EINVAL);

		src4 = (struct sockaddr_in *)src;
		if (in_nullhost(src4->sin_addr) ||
		    IN_MULTICAST(src4->sin_addr.s_addr))
			return (EINVAL);

		dst4 = (struct sockaddr_in *)dst;
		if (in_nullhost(dst4->sin_addr) ||
		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
			return (EINVAL);

		tunnel->t_src4 = src4->sin_addr;
		tunnel->t_dst4 = dst4->sin_addr;

		break;
#ifdef INET6
	case AF_INET6:
		if (dst->sa_len != sizeof(*dst6))
			return (EINVAL);

		src6 = (struct sockaddr_in6 *)src;
		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
			return (EINVAL);

		dst6 = (struct sockaddr_in6 *)dst;
		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
			return (EINVAL);

		if (src6->sin6_scope_id != dst6->sin6_scope_id)
			return (EINVAL);

		error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
		if (error != 0)
			return (error);

		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL, NULL);
		if (error != 0)
			return (error);

		break;
#endif
	default:
		return (EAFNOSUPPORT);
	}

	/* commit */
	tunnel->t_af = dst->sa_family;

	return (0);
}

static int
gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
{
	struct sockaddr *src = (struct sockaddr *)&req->addr;
	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
	struct sockaddr_in *sin;
#ifdef INET6 /* ifconfig already embeds the scopeid */
	struct sockaddr_in6 *sin6;
#endif

	switch (tunnel->t_af) {
	case AF_UNSPEC:
		return (EADDRNOTAVAIL);
	case AF_INET:
		sin = (struct sockaddr_in *)src;
		memset(sin, 0, sizeof(*sin));
		sin->sin_family = AF_INET;
		sin->sin_len = sizeof(*sin);
		sin->sin_addr = tunnel->t_src4;

		sin = (struct sockaddr_in *)dst;
		memset(sin, 0, sizeof(*sin));
		sin->sin_family = AF_INET;
		sin->sin_len = sizeof(*sin);
		sin->sin_addr = tunnel->t_dst4;

		break;

#ifdef INET6
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)src;
		memset(sin6, 0, sizeof(*sin6));
		sin6->sin6_family = AF_INET6;
		sin6->sin6_len = sizeof(*sin6);
		in6_recoverscope(sin6, &tunnel->t_src6);

		sin6 = (struct sockaddr_in6 *)dst;
		memset(sin6, 0, sizeof(*sin6));
		sin6->sin6_family = AF_INET6;
		sin6->sin6_len = sizeof(*sin6);
		in6_recoverscope(sin6, &tunnel->t_dst6);

		break;
#endif
	default:
		return (EAFNOSUPPORT);
	}

	return (0);
}

static int
gre_del_tunnel(struct gre_tunnel *tunnel)
{
	/* commit */
	tunnel->t_af = AF_UNSPEC;

	return (0);
}

static int
gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
{
	uint32_t key;
	uint32_t min = GRE_KEY_MIN;
	uint32_t max = GRE_KEY_MAX;
	unsigned int shift = GRE_KEY_SHIFT;
	uint32_t mask = GRE_KEY_MASK;

	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
		min = GRE_KEY_ENTROPY_MIN;
		max = GRE_KEY_ENTROPY_MAX;
		shift = GRE_KEY_ENTROPY_SHIFT;
		mask = GRE_KEY_ENTROPY;
	}

	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
		return (EINVAL);

	key = htonl(ifr->ifr_vnetid << shift);

	/* commit */
	tunnel->t_key_mask = mask;
	tunnel->t_key = key;

	return (0);
}

static int
gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
{
	int shift;

	switch (tunnel->t_key_mask) {
	case GRE_KEY_NONE:
		return (EADDRNOTAVAIL);
	case GRE_KEY_ENTROPY:
		shift = GRE_KEY_ENTROPY_SHIFT;
		break;
	case GRE_KEY_MASK:
		shift = GRE_KEY_SHIFT;
		break;
	}

	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;

	return (0);
}

static int
gre_del_vnetid(struct gre_tunnel *tunnel)
{
	tunnel->t_key_mask = GRE_KEY_NONE;

	return (0);
}

static int
gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
{
	uint32_t mask, key;

	if (tunnel->t_key_mask == GRE_KEY_NONE)
		return (EADDRNOTAVAIL);

	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
	if (tunnel->t_key_mask == mask) {
		/* nop */
		return (0);
	}

	key = ntohl(tunnel->t_key);
	if (mask == GRE_KEY_ENTROPY) {
		if (key > GRE_KEY_ENTROPY_MAX)
			return (ERANGE);

		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
	} else
		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);

	/* commit */
	tunnel->t_key_mask = mask;
	tunnel->t_key = key;

	return (0);
}

static int
gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
{
	if (tunnel->t_key_mask == GRE_KEY_NONE)
		return (EADDRNOTAVAIL);

	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;

	return (0);
}

static int
mgre_up(struct mgre_softc *sc)
{
	unsigned int hlen;

	switch (sc->sc_tunnel.t_af) {
	case AF_UNSPEC:
		return (EDESTADDRREQ);
	case AF_INET:
		hlen = sizeof(struct ip);
		break;
#ifdef INET6
	case AF_INET6:
		hlen = sizeof(struct ip6_hdr);
		break;
#endif /* INET6 */
	default:
		unhandled_af(sc->sc_tunnel.t_af);
	}

	hlen += sizeof(struct gre_header);
	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
		hlen += sizeof(struct gre_h_key);

	NET_ASSERT_LOCKED();

	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
		return (EADDRINUSE);

	sc->sc_if.if_hdrlen = hlen;
	SET(sc->sc_if.if_flags, IFF_RUNNING);

	return (0);
}

static int
mgre_down(struct mgre_softc *sc)
{
	NET_ASSERT_LOCKED();

	CLR(sc->sc_if.if_flags, IFF_RUNNING);
	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */

	RBT_REMOVE(mgre_tree, &mgre_tree, sc);

	/* barrier? */

	return (0);
}

static int
egre_up(struct egre_softc *sc)
{
	if (sc->sc_tunnel.t_af == AF_UNSPEC)
		return (EDESTADDRREQ);

	NET_ASSERT_LOCKED();

	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
		return (EADDRINUSE);

	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);

	return (0);
}

static int
egre_down(struct egre_softc *sc)
{
	NET_ASSERT_LOCKED();

	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);

	RBT_REMOVE(egre_tree, &egre_tree, sc);

	/* barrier? */

	return (0);
}

static int
egre_media_change(struct ifnet *ifp)
{
	return (ENOTTY);
}

static void
egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
{
	imr->ifm_active = IFM_ETHER | IFM_AUTO;
	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
}

static int
nvgre_up(struct nvgre_softc *sc)
{
	struct gre_tunnel *tunnel = &sc->sc_tunnel;
	struct ifnet *ifp0;
	void *inm;
	int error;

	if (tunnel->t_af == AF_UNSPEC)
		return (EDESTADDRREQ);

	ifp0 = if_get(sc->sc_ifp0);
	if (ifp0 == NULL)
		return (ENXIO);
	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
		error = ENODEV;
		goto put;
	}

	NET_ASSERT_LOCKED();

	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
		error = EADDRINUSE;
		goto put;
	}
	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
		error = EADDRINUSE;
		goto remove_mcast;
	}

	switch (tunnel->t_af) {
	case AF_INET:
		inm = in_addmulti(&tunnel->t_dst4, ifp0);
		if (inm == NULL) {
			error = ECONNABORTED;
			goto remove_ucast;
		}
		break;
#ifdef INET6
	case AF_INET6:
		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
		if (inm == NULL) {
			/* error is already set */
			goto remove_ucast;
		}
		break;
#endif /* INET6 */
	default:
		unhandled_af(tunnel->t_af);
	}

	if_linkstatehook_add(ifp0, &sc->sc_ltask);
	if_detachhook_add(ifp0, &sc->sc_dtask);

	if_put(ifp0);

	sc->sc_inm = inm;
	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);

	return (0);

remove_ucast:
	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
remove_mcast:
	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
put:
	if_put(ifp0);
	return (error);
}

static int
nvgre_down(struct nvgre_softc *sc)
{
	struct gre_tunnel *tunnel = &sc->sc_tunnel;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct taskq *softnet = net_tq(ifp->if_index);
	struct ifnet *ifp0;

	NET_ASSERT_LOCKED();

	CLR(ifp->if_flags, IFF_RUNNING);

	NET_UNLOCK();
	ifq_barrier(&ifp->if_snd);
	if (!task_del(softnet, &sc->sc_send_task))
		taskq_barrier(softnet);
	NET_LOCK();

	mq_purge(&sc->sc_send_list);

	ifp0 = if_get(sc->sc_ifp0);
	if (ifp0 != NULL) {
		if_detachhook_del(ifp0, &sc->sc_dtask);
		if_linkstatehook_del(ifp0, &sc->sc_ltask);
	}
	if_put(ifp0);

	switch (tunnel->t_af) {
	case AF_INET:
		in_delmulti(sc->sc_inm);
		break;

#ifdef INET6
	case AF_INET6:
		in6_delmulti(sc->sc_inm);
		break;
#endif
	default:
		unhandled_af(tunnel->t_af);
	}

	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);

	return (0);
}

static void
nvgre_link_change(void *arg)
{
	/* nop */
}

static void
nvgre_detach(void *arg)
{
	struct nvgre_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ac.ac_if;

	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
		nvgre_down(sc);
		if_down(ifp);
	}

	sc->sc_ifp0 = 0;
}

static int
nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
{
	struct ifnet *ifp0;

	ifp0 = if_unit(parent);
	if (ifp0 == NULL)
		return (EINVAL);

	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
		if_put(ifp0);
		return (EPROTONOSUPPORT);
	}

	ifsetlro(ifp0, 0);

	/* commit */
	sc->sc_ifp0 = ifp0->if_index;
	if_put(ifp0);

	return (0);
}

static int
nvgre_add_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
{
	struct sockaddr_in *sin;
#ifdef INET6
	struct sockaddr_in6 *sin6;
	struct sockaddr_in6 src6 = {
		.sin6_len = sizeof(src6),
		.sin6_family = AF_UNSPEC,
	};
	int error;
#endif
	union gre_addr endpoint;
	unsigned int type;

	/* ignore ifba_ifsname */

	if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
		return (EINVAL);
	switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
	case IFBAF_DYNAMIC:
		type = EBE_DYNAMIC;
		break;
	case IFBAF_STATIC:
		type = EBE_STATIC;
		break;
	default:
		return (EINVAL);
	}

	memset(&endpoint, 0, sizeof(endpoint));

	if (ifba->ifba_dstsa.ss_family != sc->sc_tunnel.t_af)
		return (EAFNOSUPPORT);
	switch (ifba->ifba_dstsa.ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
		if (in_nullhost(sin->sin_addr) ||
		    IN_MULTICAST(sin->sin_addr.s_addr))
			return (EADDRNOTAVAIL);

		endpoint.in4 = sin->sin_addr;
		break;

#ifdef INET6
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
			return (EADDRNOTAVAIL);

		in6_recoverscope(&src6, &sc->sc_tunnel.t_src6);

		if (src6.sin6_scope_id != sin6->sin6_scope_id)
			return (EADDRNOTAVAIL);

		error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL);
		if (error != 0)
			return (error);

		break;
#endif
	default: /* AF_UNSPEC */
		return (EADDRNOTAVAIL);
	}

	return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
	    &ifba->ifba_dst, type));
}

static int
nvgre_del_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
{
	return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
}

static void
nvgre_start(struct ifnet *ifp)
{
	struct nvgre_softc *sc = ifp->if_softc;
	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
	union gre_addr gateway;
	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
	struct ether_header *eh;
	struct mbuf *m, *m0;
#if NBPFILTER > 0
	caddr_t if_bpf;
#endif

	if (!gre_allow) {
		ifq_purge(&ifp->if_snd);
		return;
	}

	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
#if NBPFILTER > 0
		if_bpf = ifp->if_bpf;
		if (if_bpf)
			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
#endif

		eh = mtod(m0, struct ether_header *);
		if (ETHER_IS_BROADCAST(eh->ether_dhost))
			gateway = tunnel->t_dst;
		else {
			const union gre_addr *endpoint;

			smr_read_enter();
			endpoint = etherbridge_resolve_ea(&sc->sc_eb,
			    (struct ether_addr *)eh->ether_dhost);
			if (endpoint == NULL) {
				/* "flood" to unknown hosts */
				endpoint = &tunnel->t_dst;
			}
			gateway = *endpoint;
			smr_read_leave();
		}

		/* force prepend mbuf because of alignment problems */
		m = m_get(M_DONTWAIT, m0->m_type);
		if (m == NULL) {
			m_freem(m0);
			continue;
		}

		M_MOVE_PKTHDR(m, m0);
		m->m_next = m0;

		m_align(m, 0);
		m->m_len = 0;

		m = gre_encap_dst(tunnel, &gateway, m,
		    htons(ETHERTYPE_TRANSETHER),
		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
		if (m == NULL)
			continue;

		m->m_flags &= ~(M_BCAST|M_MCAST);
		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;

#if NPF > 0
		pf_pkt_addr_changed(m);
#endif

		ml_enqueue(&ml, m);
	}

	if (!ml_empty(&ml)) {
		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
		/* else set OACTIVE? */
	}
}

static uint64_t
nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
{
	struct ip_moptions imo;
	struct mbuf *m;
	uint64_t oerrors = 0;

	imo.imo_ifidx = sc->sc_ifp0;
	imo.imo_ttl = sc->sc_tunnel.t_ttl;
	imo.imo_loop = 0;

	NET_LOCK();
	while ((m = ml_dequeue(ml)) != NULL) {
		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
			oerrors++;
	}
	NET_UNLOCK();

	return (oerrors);
}

#ifdef INET6
static uint64_t
nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
{
	struct ip6_moptions im6o;
	struct mbuf *m;
	uint64_t oerrors = 0;

	im6o.im6o_ifidx = sc->sc_ifp0;
	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
	im6o.im6o_loop = 0;

	NET_LOCK();
	while ((m = ml_dequeue(ml)) != NULL) {
		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
			oerrors++;
	}
	NET_UNLOCK();

	return (oerrors);
}
#endif /* INET6 */

static void
nvgre_send(void *arg)
{
	struct nvgre_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	sa_family_t af = sc->sc_tunnel.t_af;
	struct mbuf_list ml;
	uint64_t oerrors;

	if (!ISSET(ifp->if_flags, IFF_RUNNING))
		return;

	mq_delist(&sc->sc_send_list, &ml);
	if (ml_empty(&ml))
		return;

	switch (af) {
	case AF_INET:
		oerrors = nvgre_send4(sc, &ml);
		break;
#ifdef INET6
	case AF_INET6:
		oerrors = nvgre_send6(sc, &ml);
		break;
#endif
	default:
		unhandled_af(af);
		/* NOTREACHED */
	}

	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
}

static int
eoip_up(struct eoip_softc *sc)
{
	if (sc->sc_tunnel.t_af == AF_UNSPEC)
		return (EDESTADDRREQ);

	NET_ASSERT_LOCKED();

	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
		return (EADDRINUSE);

	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);

	if (sc->sc_ka_state != GRE_KA_NONE) {
		sc->sc_ka_holdmax = sc->sc_ka_count;
		eoip_keepalive_send(sc);
	}

	return (0);
}

static int
eoip_down(struct eoip_softc *sc)
{
	NET_ASSERT_LOCKED();
	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);

	if (sc->sc_ka_state != GRE_KA_NONE) {
		timeout_del_barrier(&sc->sc_ka_hold);
		timeout_del_barrier(&sc->sc_ka_send);

		sc->sc_ka_state = GRE_KA_DOWN;
		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
	}

	RBT_REMOVE(eoip_tree, &eoip_tree, sc);

	return (0);
}

static void
eoip_start(struct ifnet *ifp)
{
	struct eoip_softc *sc = ifp->if_softc;
	struct mbuf *m0, *m;
#if NBPFILTER > 0
	caddr_t if_bpf;
#endif

	if (!gre_allow) {
		ifq_purge(&ifp->if_snd);
		return;
	}

	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
#if NBPFILTER > 0
		if_bpf = ifp->if_bpf;
		if (if_bpf)
			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
#endif

		/* force prepend mbuf because of alignment problems */
		m = m_get(M_DONTWAIT, m0->m_type);
		if (m == NULL) {
			m_freem(m0);
			continue;
		}

		M_MOVE_PKTHDR(m, m0);
		m->m_next = m0;

		m_align(m, 0);
		m->m_len = 0;

		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
			ifp->if_oerrors++;
			continue;
		}
	}
}

static struct mbuf *
eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
{
	struct gre_header *gh;
	struct gre_h_key_eoip *eoiph;
	int len = m->m_pkthdr.len;

	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
	if (m == NULL)
		return (NULL);

	gh = mtod(m, struct gre_header *);
	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
	gh->gre_proto = htons(GRE_EOIP);

	eoiph = (struct gre_h_key_eoip *)(gh + 1);
	htobem16(&eoiph->eoip_len, len);
	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;

	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
}

static void
eoip_keepalive_send(void *arg)
{
	struct eoip_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ac.ac_if;
	struct mbuf *m;
	int linkhdr;

	if (!ISSET(ifp->if_flags, IFF_RUNNING))
		return;

	/* this is really conservative */
#ifdef INET6
	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
#else
	linkhdr = max_linkhdr + sizeof(struct ip) +
	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
#endif
	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m == NULL)
		return;

	if (linkhdr > MHLEN) {
		MCLGETL(m, M_DONTWAIT, linkhdr);
		if (!ISSET(m->m_flags, M_EXT)) {
			m_freem(m);
			return;
		}
	}

	m->m_pkthdr.pf.prio = ifp->if_llprio;
	m->m_pkthdr.len = m->m_len = linkhdr;
	m_adj(m, linkhdr);

	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
	if (m == NULL)
		return;

	gre_ip_output(&sc->sc_tunnel, m);

	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
}

static void
eoip_keepalive_hold(void *arg)
{
	struct eoip_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ac.ac_if;

	if (!ISSET(ifp->if_flags, IFF_RUNNING))
		return;

	NET_LOCK();
	sc->sc_ka_state = GRE_KA_DOWN;
	gre_link_state(ifp, sc->sc_ka_state);
	NET_UNLOCK();
}

static void
eoip_keepalive_recv(struct eoip_softc *sc)
{
	switch (sc->sc_ka_state) {
	case GRE_KA_NONE:
		return;
	case GRE_KA_DOWN:
		sc->sc_ka_state = GRE_KA_HOLD;
		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
		    16 * sc->sc_ka_count);
		break;
	case GRE_KA_HOLD:
		if (--sc->sc_ka_holdcnt > 0)
			break;

		sc->sc_ka_state = GRE_KA_UP;
		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
		break;

	case GRE_KA_UP:
		sc->sc_ka_holdmax--;
		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
		break;
	}

	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
}

static struct mbuf *
eoip_input(struct gre_tunnel *key, struct mbuf *m,
    const struct gre_header *gh, uint8_t otos, int iphlen)
{
	struct eoip_softc *sc;
	struct gre_h_key_eoip *eoiph;
	int hlen, len;
	caddr_t buf;

	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
		goto decline;

	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
	if (m->m_pkthdr.len < hlen)
		goto decline;

	m = m_pullup(m, hlen);
	if (m == NULL)
		return (NULL);

	buf = mtod(m, caddr_t);
	gh = (struct gre_header *)(buf + iphlen);
	eoiph = (struct gre_h_key_eoip *)(gh + 1);

	key->t_key = eoiph->eoip_tunnel_id;

	NET_ASSERT_LOCKED();
	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
	if (sc == NULL)
		goto decline;

	/* it's ours now */
	len = bemtoh16(&eoiph->eoip_len);
	if (len == 0) {
		eoip_keepalive_recv(sc);
		goto drop;
	}

	m = gre_ether_align(m, hlen);
	if (m == NULL)
		return (NULL);

	if (m->m_pkthdr.len < len)
		goto drop;
	if (m->m_pkthdr.len != len)
		m_adj(m, len - m->m_pkthdr.len);

	m->m_flags &= ~(M_MCAST|M_BCAST);

	gre_l2_prio(&sc->sc_tunnel, m, otos);

	if_vinput(&sc->sc_ac.ac_if, m);

	return (NULL);

decline:
	return (m);
drop:
	m_freem(m);
	return (NULL);
}

const struct sysctl_bounded_args gre_vars[] = {
	{ GRECTL_ALLOW, &gre_allow, 0, 1 },
	{ GRECTL_WCCP, &gre_wccp, 0, 1 },
};

int
gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
    size_t newlen)
{
	int error;

	NET_LOCK();
	error = sysctl_bounded_arr(gre_vars, nitems(gre_vars), name,
	    namelen, oldp, oldlenp, newp, newlen);
	NET_UNLOCK();
	return error;
}

static inline int
gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
{
	switch (af) {
#ifdef INET6
	case AF_INET6:
		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
#endif /* INET6 */
	case AF_INET:
		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
	default:
		unhandled_af(af);
	}

	return (0);
}

static int
gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
{
	uint32_t ka, kb;
	uint32_t mask;
	int rv;

	/* is K set at all? */
	ka = a->t_key_mask & GRE_KEY_ENTROPY;
	kb = b->t_key_mask & GRE_KEY_ENTROPY;

	/* sort by whether K is set */
	if (ka > kb)
		return (1);
	if (ka < kb)
		return (-1);

	/* is K set on both? */
	if (ka != GRE_KEY_NONE) {
		/* get common prefix */
		mask = a->t_key_mask & b->t_key_mask;

		ka = a->t_key & mask;
		kb = b->t_key & mask;

		/* sort by common prefix */
		if (ka > kb)
			return (1);
		if (ka < kb)
			return (-1);
	}

	/* sort by routing table */
	if (a->t_rtableid > b->t_rtableid)
		return (1);
	if (a->t_rtableid < b->t_rtableid)
		return (-1);

	/* sort by address */
	if (a->t_af > b->t_af)
		return (1);
	if (a->t_af < b->t_af)
		return (-1);

	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
	if (rv != 0)
		return (rv);

	return (0);
}

static int
gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
{
	int rv;

	rv = gre_cmp_src(a, b);
	if (rv != 0)
		return (rv);

	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
}

static inline int
mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
{
	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
}

RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);

static inline int
egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
{
	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
}

RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);

static int
nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
{
	uint32_t ka, kb;

	ka = a->t_key & GRE_KEY_ENTROPY;
	kb = b->t_key & GRE_KEY_ENTROPY;

	/* sort by common prefix */
	if (ka > kb)
		return (1);
	if (ka < kb)
		return (-1);

	/* sort by routing table */
	if (a->t_rtableid > b->t_rtableid)
		return (1);
	if (a->t_rtableid < b->t_rtableid)
		return (-1);

	/* sort by address */
	if (a->t_af > b->t_af)
		return (1);
	if (a->t_af < b->t_af)
		return (-1);

	return (0);
}

static inline int
nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
{
	const struct gre_tunnel *a = &na->sc_tunnel;
	const struct gre_tunnel *b = &nb->sc_tunnel;
	int rv;

	rv = nvgre_cmp_tunnel(a, b);
	if (rv != 0)
		return (rv);

	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
	if (rv != 0)
		return (rv);

	return (0);
}

static int
nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
    unsigned int if0idxa, const struct gre_tunnel *b,
    const union gre_addr *ab,unsigned int if0idxb)
{
	int rv;

	rv = nvgre_cmp_tunnel(a, b);
	if (rv != 0)
		return (rv);

	rv = gre_ip_cmp(a->t_af, aa, ab);
	if (rv != 0)
		return (rv);

	if (if0idxa > if0idxb)
		return (1);
	if (if0idxa < if0idxb)
		return (-1);

	return (0);
}

static inline int
nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
{
	const struct gre_tunnel *a = &na->sc_tunnel;
	const struct gre_tunnel *b = &nb->sc_tunnel;

	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
	    b, &b->t_dst, nb->sc_ifp0));
}

RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);

static inline int
eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
{
	const struct gre_tunnel *a = &ea->sc_tunnel;
	const struct gre_tunnel *b = &eb->sc_tunnel;
	int rv;

	if (a->t_key > b->t_key)
		return (1);
	if (a->t_key < b->t_key)
		return (-1);

	/* sort by routing table */
	if (a->t_rtableid > b->t_rtableid)
		return (1);
	if (a->t_rtableid < b->t_rtableid)
		return (-1);

	/* sort by address */
	if (a->t_af > b->t_af)
		return (1);
	if (a->t_af < b->t_af)
		return (-1);

	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
	if (rv != 0)
		return (rv);

	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
	if (rv != 0)
		return (rv);

	return (0);
}

RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);

static int
nvgre_eb_port_eq(void *arg, void *a, void *b)
{
	struct nvgre_softc *sc = arg;

	return (gre_ip_cmp(sc->sc_tunnel.t_af, a, b) == 0);
}

static void *
nvgre_eb_port_take(void *arg, void *port)
{
	union gre_addr *ea = port;
	union gre_addr *endpoint;

	endpoint = pool_get(&nvgre_endpoint_pool, PR_NOWAIT);
	if (endpoint == NULL)
		return (NULL);

	*endpoint = *ea;

	return (endpoint);
}

static void
nvgre_eb_port_rele(void *arg, void *port)
{
	union gre_addr *endpoint = port;

	pool_put(&nvgre_endpoint_pool, endpoint);
}

static size_t
nvgre_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
{
	struct nvgre_softc *sc = arg;

	return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
}

static void
nvgre_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
{
	struct nvgre_softc *sc = arg;
	union gre_addr *endpoint = port;

	switch (sc->sc_tunnel.t_af) {
	case AF_INET: {
		struct sockaddr_in *sin = (struct sockaddr_in *)ss;

		sin->sin_len = sizeof(*sin);
		sin->sin_family = AF_INET;
		sin->sin_addr = endpoint->in4;
		break;
	}
#ifdef INET6
	case AF_INET6: {
		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;

		sin6->sin6_len = sizeof(*sin6);
		sin6->sin6_family = AF_INET6;
		in6_recoverscope(sin6, &endpoint->in6);

		break;
	}
#endif /* INET6 */
	default:
		unhandled_af(sc->sc_tunnel.t_af);
	}
}