[BACK]Return to ip_mroute.c CVS log [TXT][DIR] Up to [local] / src / sys / netinet

File: [local] / src / sys / netinet / ip_mroute.c (download)

Revision 1.142, Sat Apr 6 14:23:27 2024 UTC (8 weeks, 1 day ago) by bluhm
Branch: MAIN
CVS Tags: HEAD
Changes since 1.141: +10 -11 lines

IP multicast sysctl mrtmfc must not write outside of allocation.

Reading sysctl mrt_sysctl_mfc() allocates memory to be copied back
to user.  Chunks of struct mfcinfo are copied from routing table
to linear heap memory.  If the allocated memory was not a multiple
the struct size, a struct mfcinfo could be copied to a partially
unallocated destination.  Check that the end of the struct is within
the allocation.

From Alfredo Ortega;  OK claudio@

/*	$OpenBSD: ip_mroute.c,v 1.142 2024/04/06 14:23:27 bluhm Exp $	*/
/*	$NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $	*/

/*
 * Copyright (c) 1989 Stephen Deering
 * Copyright (c) 1992, 1993
 *      The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Stephen Deering of Stanford University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
 */

/*
 * IP multicast forwarding procedures
 *
 * Written by David Waitzman, BBN Labs, August 1988.
 * Modified by Steve Deering, Stanford, February 1989.
 * Modified by Mark J. Steiglitz, Stanford, May, 1991
 * Modified by Van Jacobson, LBL, January 1993
 * Modified by Ajit Thyagarajan, PARC, August 1993
 * Modified by Bill Fenner, PARC, April 1994
 * Modified by Charles M. Hannum, NetBSD, May 1995.
 * Modified by Ahmed Helmy, SGI, June 1996
 * Modified by George Edmond Eddy (Rusty), ISI, February 1998
 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
 * Modified by Hitoshi Asaeda, WIDE, August 2000
 * Modified by Pavlin Radoslavov, ICSI, October 2002
 *
 * MROUTING Revision: 1.2
 * advanced API support, bandwidth metering and signaling
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/ioctl.h>
#include <sys/syslog.h>

#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_pcb.h>
#include <netinet/igmp.h>
#include <netinet/ip_mroute.h>

/* #define MCAST_DEBUG */

#ifdef MCAST_DEBUG
int mcast_debug = 1;
#define DPRINTF(fmt, args...)						\
	do {								\
		if (mcast_debug)					\
			printf("%s:%d " fmt "\n",			\
			    __func__, __LINE__, ## args);		\
	} while (0)
#else
#define DPRINTF(fmt, args...)			\
	do { } while (0)
#endif

/*
 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
 * except for netstat or debugging purposes.
 */
struct socket	*ip_mrouter[RT_TABLEID_MAX + 1];
struct rttimer_queue ip_mrouterq;
uint64_t	 mrt_count[RT_TABLEID_MAX + 1];
int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */

struct mrtstat	mrtstat;

struct rtentry	*mfc_find(struct ifnet *, struct in_addr *,
    struct in_addr *, unsigned int);
int get_sg_cnt(unsigned int, struct sioc_sg_req *);
int get_vif_cnt(unsigned int, struct sioc_vif_req *);
int mrt_rtwalk_mfcsysctl(struct rtentry *, void *, unsigned int);
int ip_mrouter_init(struct socket *, struct mbuf *);
int mrouter_rtwalk_delete(struct rtentry *, void *, unsigned int);
int get_version(struct mbuf *);
int add_vif(struct socket *, struct mbuf *);
int del_vif(struct socket *, struct mbuf *);
void update_mfc_params(struct mfcctl2 *, int, unsigned int);
int mfc_add(struct mfcctl2 *, struct in_addr *, struct in_addr *,
    int, unsigned int, int);
int add_mfc(struct socket *, struct mbuf *);
int del_mfc(struct socket *, struct mbuf *);
int set_api_config(struct socket *, struct mbuf *); /* chose API capabilities */
int get_api_support(struct mbuf *);
int get_api_config(struct mbuf *);
int socket_send(struct socket *, struct mbuf *,
			    struct sockaddr_in *);
int ip_mdq(struct mbuf *, struct ifnet *, struct rtentry *);
struct ifnet *if_lookupbyvif(vifi_t, unsigned int);
struct rtentry *rt_mcast_add(struct ifnet *, struct sockaddr *,
    struct sockaddr *);
void mrt_mcast_del(struct rtentry *, unsigned int);

/*
 * Kernel multicast routing API capabilities and setup.
 * If more API capabilities are added to the kernel, they should be
 * recorded in `mrt_api_support'.
 */
static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
					  MRT_MFC_RP);
static u_int32_t mrt_api_config = 0;

/*
 * Find a route for a given origin IP address and Multicast group address
 * Type of service parameter to be added in the future!!!
 * Statistics are updated by the caller if needed
 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
 */
struct rtentry *
mfc_find(struct ifnet *ifp, struct in_addr *origin, struct in_addr *group,
    unsigned int rtableid)
{
	struct rtentry		*rt;
	struct sockaddr_in	 msin;

	memset(&msin, 0, sizeof(msin));
	msin.sin_len = sizeof(msin);
	msin.sin_family = AF_INET;
	msin.sin_addr = *group;

	rt = rtalloc(sintosa(&msin), 0, rtableid);
	do {
		if (!rtisvalid(rt)) {
			rtfree(rt);
			return NULL;
		}
		/* Don't consider non multicast routes. */
		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
		    (RTF_HOST | RTF_MULTICAST))
			continue;
		/* Return first occurrence if interface is not specified. */
		if (ifp == NULL)
			return (rt);
		if (rt->rt_ifidx == ifp->if_index)
			return (rt);
	} while ((rt = rtable_iterate(rt)) != NULL);

	return (NULL);
}

/*
 * Handle MRT setsockopt commands to modify the multicast routing tables.
 */
int
ip_mrouter_set(struct socket *so, int optname, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	int error;

	if (optname != MRT_INIT &&
	    so != ip_mrouter[inp->inp_rtableid])
		error = ENOPROTOOPT;
	else
		switch (optname) {
		case MRT_INIT:
			error = ip_mrouter_init(so, m);
			break;
		case MRT_DONE:
			error = ip_mrouter_done(so);
			break;
		case MRT_ADD_VIF:
			error = add_vif(so, m);
			break;
		case MRT_DEL_VIF:
			error = del_vif(so, m);
			break;
		case MRT_ADD_MFC:
			error = add_mfc(so, m);
			break;
		case MRT_DEL_MFC:
			error = del_mfc(so, m);
			break;
		case MRT_API_CONFIG:
			error = set_api_config(so, m);
			break;
		default:
			error = ENOPROTOOPT;
			break;
		}

	return (error);
}

/*
 * Handle MRT getsockopt commands
 */
int
ip_mrouter_get(struct socket *so, int optname, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	int error;

	if (so != ip_mrouter[inp->inp_rtableid])
		error = ENOPROTOOPT;
	else {
		switch (optname) {
		case MRT_VERSION:
			error = get_version(m);
			break;
		case MRT_API_SUPPORT:
			error = get_api_support(m);
			break;
		case MRT_API_CONFIG:
			error = get_api_config(m);
			break;
		default:
			error = ENOPROTOOPT;
			break;
		}
	}

	return (error);
}

/*
 * Handle ioctl commands to obtain information from the cache
 */
int
mrt_ioctl(struct socket *so, u_long cmd, caddr_t data)
{
	struct inpcb *inp = sotoinpcb(so);
	int error;

	if (inp == NULL)
		return (ENOTCONN);

	KERNEL_LOCK();

	if (so != ip_mrouter[inp->inp_rtableid])
		error = EINVAL;
	else
		switch (cmd) {
		case SIOCGETVIFCNT:
			NET_LOCK_SHARED();
			error = get_vif_cnt(inp->inp_rtableid,
			    (struct sioc_vif_req *)data);
			NET_UNLOCK_SHARED();
			break;
		case SIOCGETSGCNT:
			NET_LOCK_SHARED();
			error = get_sg_cnt(inp->inp_rtableid,
			    (struct sioc_sg_req *)data);
			NET_UNLOCK_SHARED();
			break;
		default:
			error = ENOTTY;
			break;
		}

	KERNEL_UNLOCK();
	return (error);
}

/*
 * returns the packet, byte, rpf-failure count for the source group provided
 */
int
get_sg_cnt(unsigned int rtableid, struct sioc_sg_req *req)
{
	struct rtentry *rt;
	struct mfc *mfc;

	rt = mfc_find(NULL, &req->src, &req->grp, rtableid);
	if (rt == NULL) {
		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
		return (EADDRNOTAVAIL);
	}

	req->pktcnt = req->bytecnt = req->wrong_if = 0;
	do {
		/* Don't consider non multicast routes. */
		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
		    (RTF_HOST | RTF_MULTICAST))
			continue;

		mfc = (struct mfc *)rt->rt_llinfo;
		if (mfc == NULL)
			continue;

		req->pktcnt += mfc->mfc_pkt_cnt;
		req->bytecnt += mfc->mfc_byte_cnt;
		req->wrong_if += mfc->mfc_wrong_if;
	} while ((rt = rtable_iterate(rt)) != NULL);

	return (0);
}

/*
 * returns the input and output packet and byte counts on the vif provided
 */
int
get_vif_cnt(unsigned int rtableid, struct sioc_vif_req *req)
{
	struct ifnet	*ifp;
	struct vif	*v;
	vifi_t		 vifi = req->vifi;

	if ((ifp = if_lookupbyvif(vifi, rtableid)) == NULL)
		return (EINVAL);

	v = (struct vif *)ifp->if_mcast;
	req->icount = v->v_pkt_in;
	req->ocount = v->v_pkt_out;
	req->ibytes = v->v_bytes_in;
	req->obytes = v->v_bytes_out;

	return (0);
}

int
mrt_sysctl_vif(void *oldp, size_t *oldlenp)
{
	caddr_t where = oldp;
	size_t needed, given;
	struct ifnet *ifp;
	struct vif *vifp;
	struct vifinfo vinfo;

	given = *oldlenp;
	needed = 0;
	memset(&vinfo, 0, sizeof vinfo);
	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
		if ((vifp = (struct vif *)ifp->if_mcast) == NULL)
			continue;

		vinfo.v_vifi = vifp->v_id;
		vinfo.v_flags = vifp->v_flags;
		vinfo.v_threshold = vifp->v_threshold;
		vinfo.v_lcl_addr = vifp->v_lcl_addr;
		vinfo.v_rmt_addr = vifp->v_rmt_addr;
		vinfo.v_pkt_in = vifp->v_pkt_in;
		vinfo.v_pkt_out = vifp->v_pkt_out;
		vinfo.v_bytes_in = vifp->v_bytes_in;
		vinfo.v_bytes_out = vifp->v_bytes_out;

		needed += sizeof(vinfo);
		if (where && needed <= given) {
			int error;

			error = copyout(&vinfo, where, sizeof(vinfo));
			if (error)
				return (error);
			where += sizeof(vinfo);
		}
	}
	if (where) {
		*oldlenp = needed;
		if (given < needed)
			return (ENOMEM);
	} else
		*oldlenp = (11 * needed) / 10;

	return (0);
}

struct mfcsysctlarg {
	struct mfcinfo	*msa_minfos;
	size_t		 msa_len;
	size_t		 msa_needed;
};

int
mrt_rtwalk_mfcsysctl(struct rtentry *rt, void *arg, unsigned int rtableid)
{
	struct mfc		*mfc = (struct mfc *)rt->rt_llinfo;
	struct mfcsysctlarg	*msa = (struct mfcsysctlarg *)arg;
	struct ifnet		*ifp;
	struct vif		*v;
	struct mfcinfo		*minfo;
	int			 new = 0;

	/* Skip entries being removed. */
	if (mfc == NULL)
		return (0);

	/* Skip non-multicast routes. */
	if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
	    (RTF_HOST | RTF_MULTICAST))
		return (0);

	/* User just asked for the output size. */
	if (msa->msa_minfos == NULL) {
		msa->msa_needed += sizeof(*minfo);
		return (0);
	}

	/* Skip route with invalid interfaces. */
	if ((ifp = if_get(rt->rt_ifidx)) == NULL)
		return (0);
	if ((v = (struct vif *)ifp->if_mcast) == NULL) {
		if_put(ifp);
		return (0);
	}

	for (minfo = msa->msa_minfos;
	    (uint8_t *)(minfo + 1) <=
	    (uint8_t *)msa->msa_minfos + msa->msa_len;
	    minfo++) {
		/* Find a new entry or update old entry. */
		if (minfo->mfc_origin.s_addr !=
		    satosin(rt->rt_gateway)->sin_addr.s_addr ||
		    minfo->mfc_mcastgrp.s_addr !=
		    satosin(rt_key(rt))->sin_addr.s_addr) {
			if (minfo->mfc_origin.s_addr != 0 ||
			    minfo->mfc_mcastgrp.s_addr != 0)
				continue;

			new = 1;
		}

		minfo->mfc_origin = satosin(rt->rt_gateway)->sin_addr;
		minfo->mfc_mcastgrp = satosin(rt_key(rt))->sin_addr;
		minfo->mfc_parent = mfc->mfc_parent;
		minfo->mfc_pkt_cnt += mfc->mfc_pkt_cnt;
		minfo->mfc_byte_cnt += mfc->mfc_byte_cnt;
		minfo->mfc_ttls[v->v_id] = mfc->mfc_ttl;
		break;
	}

	if (new != 0)
		msa->msa_needed += sizeof(*minfo);

	if_put(ifp);

	return (0);
}

int
mrt_sysctl_mfc(void *oldp, size_t *oldlenp)
{
	unsigned int		 rtableid;
	int			 error;
	struct mfcsysctlarg	 msa;

	if (oldp != NULL && *oldlenp > MAXPHYS)
		return (EINVAL);

	memset(&msa, 0, sizeof(msa));
	if (oldp != NULL && *oldlenp > 0) {
		msa.msa_minfos = malloc(*oldlenp, M_TEMP, M_WAITOK | M_ZERO);
		msa.msa_len = *oldlenp;
	}

	for (rtableid = 0; rtableid <= RT_TABLEID_MAX; rtableid++) {
		rtable_walk(rtableid, AF_INET, NULL, mrt_rtwalk_mfcsysctl,
		    &msa);
	}

	if (msa.msa_minfos != NULL && msa.msa_needed > 0 &&
	    (error = copyout(msa.msa_minfos, oldp, msa.msa_needed)) != 0) {
		free(msa.msa_minfos, M_TEMP, msa.msa_len);
		return (error);
	}

	free(msa.msa_minfos, M_TEMP, msa.msa_len);
	*oldlenp = msa.msa_needed;

	return (0);
}

/*
 * Enable multicast routing
 */
int
ip_mrouter_init(struct socket *so, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	unsigned int rtableid = inp->inp_rtableid;
	int *v;

	if (so->so_type != SOCK_RAW ||
	    so->so_proto->pr_protocol != IPPROTO_IGMP)
		return (EOPNOTSUPP);

	if (m == NULL || m->m_len < sizeof(int))
		return (EINVAL);

	v = mtod(m, int *);
	if (*v != 1)
		return (EINVAL);

	if (ip_mrouter[rtableid] != NULL)
		return (EADDRINUSE);

	ip_mrouter[rtableid] = so;

	return (0);
}

int
mrouter_rtwalk_delete(struct rtentry *rt, void *arg, unsigned int rtableid)
{
	/* Skip non-multicast routes. */
	if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
	    (RTF_HOST | RTF_MULTICAST))
		return (0);

	return EEXIST;
}

/*
 * Disable multicast routing
 */
int
ip_mrouter_done(struct socket *so)
{
	struct inpcb *inp = sotoinpcb(so);
	struct ifnet *ifp;
	unsigned int rtableid = inp->inp_rtableid;
	int error;

	NET_ASSERT_LOCKED();

	/* Delete all remaining installed multicast routes. */
	do {
		struct rtentry *rt = NULL;

		error = rtable_walk(rtableid, AF_INET, &rt,
		    mrouter_rtwalk_delete, NULL);
		if (rt != NULL && error == EEXIST) {
			mrt_mcast_del(rt, rtableid);
			error = EAGAIN;
		}
		rtfree(rt);
	} while (error == EAGAIN);

	/* Unregister all interfaces in the domain. */
	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
		if (ifp->if_rdomain != rtableid)
			continue;

		vif_delete(ifp);
	}

	mrt_api_config = 0;

	ip_mrouter[rtableid] = NULL;
	mrt_count[rtableid] = 0;

	return (0);
}

int
get_version(struct mbuf *m)
{
	int *v = mtod(m, int *);

	*v = 0x0305;	/* XXX !!!! */
	m->m_len = sizeof(int);
	return (0);
}

/*
 * Configure API capabilities
 */
int
set_api_config(struct socket *so, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	struct ifnet *ifp;
	u_int32_t *apival;
	unsigned int rtableid = inp->inp_rtableid;

	if (m == NULL || m->m_len < sizeof(u_int32_t))
		return (EINVAL);

	apival = mtod(m, u_int32_t *);

	/*
	 * We can set the API capabilities only if it is the first operation
	 * after MRT_INIT. I.e.:
	 *  - there are no vifs installed
	 *  - the MFC table is empty
	 */
	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
		if (ifp->if_rdomain != rtableid)
			continue;
		if (ifp->if_mcast == NULL)
			continue;

		*apival = 0;
		return (EPERM);
	}
	if (mrt_count[rtableid] > 0) {
		*apival = 0;
		return (EPERM);
	}

	mrt_api_config = *apival & mrt_api_support;
	*apival = mrt_api_config;

	return (0);
}

/*
 * Get API capabilities
 */
int
get_api_support(struct mbuf *m)
{
	u_int32_t *apival;

	if (m == NULL || m->m_len < sizeof(u_int32_t))
		return (EINVAL);

	apival = mtod(m, u_int32_t *);

	*apival = mrt_api_support;

	return (0);
}

/*
 * Get API configured capabilities
 */
int
get_api_config(struct mbuf *m)
{
	u_int32_t *apival;

	if (m == NULL || m->m_len < sizeof(u_int32_t))
		return (EINVAL);

	apival = mtod(m, u_int32_t *);

	*apival = mrt_api_config;

	return (0);
}

static struct sockaddr_in sin = { sizeof(sin), AF_INET };

int
add_vif(struct socket *so, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	struct vifctl *vifcp;
	struct vif *vifp;
	struct ifaddr *ifa;
	struct ifnet *ifp;
	struct ifreq ifr;
	int error;
	unsigned int rtableid = inp->inp_rtableid;

	NET_ASSERT_LOCKED();

	if (m == NULL || m->m_len < sizeof(struct vifctl))
		return (EINVAL);

	vifcp = mtod(m, struct vifctl *);
	if (vifcp->vifc_vifi >= MAXVIFS)
		return (EINVAL);
	if (in_nullhost(vifcp->vifc_lcl_addr))
		return (EADDRNOTAVAIL);
	if (if_lookupbyvif(vifcp->vifc_vifi, rtableid) != NULL)
		return (EADDRINUSE);

	/* Tunnels are no longer supported use gif(4) instead. */
	if (vifcp->vifc_flags & VIFF_TUNNEL)
		return (EOPNOTSUPP);
	{
		sin.sin_addr = vifcp->vifc_lcl_addr;
		ifa = ifa_ifwithaddr(sintosa(&sin), rtableid);
		if (ifa == NULL)
			return (EADDRNOTAVAIL);
	}

	/* Use the physical interface associated with the address. */
	ifp = ifa->ifa_ifp;
	if (ifp->if_mcast != NULL)
		return (EADDRINUSE);

	{
		/* Make sure the interface supports multicast. */
		if ((ifp->if_flags & IFF_MULTICAST) == 0)
			return (EOPNOTSUPP);

		/* Enable promiscuous reception of all IP multicasts. */
		memset(&ifr, 0, sizeof(ifr));
		satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
		satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
		KERNEL_LOCK();
		error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
		KERNEL_UNLOCK();
		if (error)
			return (error);
	}

	vifp = malloc(sizeof(*vifp), M_MRTABLE, M_WAITOK | M_ZERO);
	ifp->if_mcast = (caddr_t)vifp;

	vifp->v_id = vifcp->vifc_vifi;
	vifp->v_flags = vifcp->vifc_flags;
	vifp->v_threshold = vifcp->vifc_threshold;
	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
	vifp->v_rmt_addr = vifcp->vifc_rmt_addr;

	return (0);
}

int
del_vif(struct socket *so, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	struct ifnet *ifp;
	vifi_t *vifip;
	unsigned int rtableid = inp->inp_rtableid;

	NET_ASSERT_LOCKED();

	if (m == NULL || m->m_len < sizeof(vifi_t))
		return (EINVAL);

	vifip = mtod(m, vifi_t *);
	if ((ifp = if_lookupbyvif(*vifip, rtableid)) == NULL)
		return (EADDRNOTAVAIL);

	vif_delete(ifp);
	return (0);
}

void
vif_delete(struct ifnet *ifp)
{
	struct vif	*v;
	struct ifreq	 ifr;

	if ((v = (struct vif *)ifp->if_mcast) == NULL)
		return;

	ifp->if_mcast = NULL;

	memset(&ifr, 0, sizeof(ifr));
	satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
	satosin(&ifr.ifr_addr)->sin_family = AF_INET;
	satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
	KERNEL_LOCK();
	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
	KERNEL_UNLOCK();

	free(v, M_MRTABLE, sizeof(*v));
}

void
mfc_expire_route(struct rtentry *rt, u_int rtableid)
{
	struct mfc	*mfc = (struct mfc *)rt->rt_llinfo;

	/* Skip entry being deleted. */
	if (mfc == NULL)
		return;

	DPRINTF("Route domain %d origin %#08X group %#08x interface %d "
	    "expire %s", rtableid, satosin(rt->rt_gateway)->sin_addr.s_addr,
	    satosin(rt_key(rt))->sin_addr.s_addr,
	    rt->rt_ifidx, mfc->mfc_expire ? "yes" : "no");

	/* Not expired, add it back to the queue. */
	if (mfc->mfc_expire == 0) {
		mfc->mfc_expire = 1;
		rt_timer_add(rt, &ip_mrouterq, rtableid);
		return;
	}

	mrt_mcast_del(rt, rtableid);
}

int
mfc_add_route(struct ifnet *ifp, struct sockaddr *origin,
    struct sockaddr *group, struct mfcctl2 *mfccp, int wait)
{
	struct vif		*v = (struct vif *)ifp->if_mcast;
	struct rtentry		*rt;
	struct mfc		*mfc;
	unsigned int		 rtableid = ifp->if_rdomain;

	rt = rt_mcast_add(ifp, origin, group);
	if (rt == NULL)
		return (EHOSTUNREACH);

	mfc = malloc(sizeof(*mfc), M_MRTABLE, wait | M_ZERO);
	if (mfc == NULL) {
		DPRINTF("origin %#08X group %#08X parent %d (%s) "
		    "malloc failed",
		    satosin(origin)->sin_addr.s_addr,
		    satosin(group)->sin_addr.s_addr,
		    mfccp->mfcc_parent, ifp->if_xname);
		mrt_mcast_del(rt, rtableid);
		rtfree(rt);
		return (ENOMEM);
	}

	rt->rt_llinfo = (caddr_t)mfc;

	rt_timer_add(rt, &ip_mrouterq, rtableid);

	mfc->mfc_parent = mfccp->mfcc_parent;
	mfc->mfc_pkt_cnt = 0;
	mfc->mfc_byte_cnt = 0;
	mfc->mfc_wrong_if = 0;
	mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
	mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
	    MRT_MFC_FLAGS_ALL;
	mfc->mfc_expire = 0;

	/* set the RP address */
	if (mrt_api_config & MRT_MFC_RP)
		mfc->mfc_rp = mfccp->mfcc_rp;
	else
		mfc->mfc_rp = zeroin_addr;

	rtfree(rt);

	return (0);
}

void
update_mfc_params(struct mfcctl2 *mfccp, int wait, unsigned int rtableid)
{
	struct rtentry		*rt;
	struct mfc		*mfc;
	struct ifnet		*ifp;
	int			 i;
	struct sockaddr_in	 osin, msin;

	memset(&osin, 0, sizeof(osin));
	osin.sin_len = sizeof(osin);
	osin.sin_family = AF_INET;
	osin.sin_addr = mfccp->mfcc_origin;

	memset(&msin, 0, sizeof(msin));
	msin.sin_len = sizeof(msin);
	msin.sin_family = AF_INET;
	msin.sin_addr = mfccp->mfcc_mcastgrp;

	for (i = 0; i < MAXVIFS; i++) {
		/* Don't add/del upstream routes here. */
		if (i == mfccp->mfcc_parent)
			continue;

		/* Test for vif existence and then update the entry. */
		if ((ifp = if_lookupbyvif(i, rtableid)) == NULL)
			continue;

		rt = mfc_find(ifp, &mfccp->mfcc_origin,
		    &mfccp->mfcc_mcastgrp, rtableid);

		/* vif not configured or removed. */
		if (mfccp->mfcc_ttls[i] == 0) {
			/* Route doesn't exist, nothing to do. */
			if (rt == NULL)
				continue;

			DPRINTF("del route (group %#08X) for vif %d (%s)",
			    mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
			mrt_mcast_del(rt, rtableid);
			rtfree(rt);
			continue;
		}

		/* Route exists, look for changes. */
		if (rt != NULL) {
			mfc = (struct mfc *)rt->rt_llinfo;
			/* Skip route being deleted. */
			if (mfc == NULL) {
				rtfree(rt);
				continue;
			}

			/* No new changes to apply. */
			if (mfccp->mfcc_ttls[i] == mfc->mfc_ttl &&
			    mfccp->mfcc_parent == mfc->mfc_parent) {
				rtfree(rt);
				continue;
			}

			DPRINTF("update route (group %#08X) for vif %d (%s)",
			    mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
			mfc->mfc_ttl = mfccp->mfcc_ttls[i];
			mfc->mfc_parent = mfccp->mfcc_parent;
			rtfree(rt);
			continue;
		}

		DPRINTF("add route (group %#08X) for vif %d (%s)",
		    mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);

		mfc_add_route(ifp, sintosa(&osin), sintosa(&msin),
		    mfccp, wait);
	}

	/* Create route for the parent interface. */
	if ((ifp = if_lookupbyvif(mfccp->mfcc_parent, rtableid)) == NULL) {
		DPRINTF("failed to find upstream interface %d",
		    mfccp->mfcc_parent);
		return;
	}

	/* We already have a route, nothing to do here. */
	if ((rt = mfc_find(ifp, &mfccp->mfcc_origin,
	    &mfccp->mfcc_mcastgrp, rtableid)) != NULL) {
		rtfree(rt);
		return;
	}

	DPRINTF("add upstream route (group %#08X) for if %s",
	    mfccp->mfcc_mcastgrp.s_addr, ifp->if_xname);
	mfc_add_route(ifp, sintosa(&osin), sintosa(&msin), mfccp, wait);
}

int
mfc_add(struct mfcctl2 *mfcctl2, struct in_addr *origin,
    struct in_addr *group, int vidx, unsigned int rtableid, int wait)
{
	struct ifnet		*ifp;
	struct vif		*v;
	struct mfcctl2		 mfcctl;

	ifp = if_lookupbyvif(vidx, rtableid);
	if (ifp == NULL ||
	    (v = (struct vif *)ifp->if_mcast) == NULL)
		return (EHOSTUNREACH);

	memset(&mfcctl, 0, sizeof(mfcctl));
	if (mfcctl2 == NULL) {
		mfcctl.mfcc_origin = *origin;
		mfcctl.mfcc_mcastgrp = *group;
		mfcctl.mfcc_parent = vidx;
	} else
		memcpy(&mfcctl, mfcctl2, sizeof(mfcctl));

	update_mfc_params(&mfcctl, wait, rtableid);

	return (0);
}

int
add_mfc(struct socket *so, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	struct mfcctl2 mfcctl2;
	int mfcctl_size = sizeof(struct mfcctl);
	unsigned int rtableid = inp->inp_rtableid;

	NET_ASSERT_LOCKED();

	if (mrt_api_config & MRT_API_FLAGS_ALL)
		mfcctl_size = sizeof(struct mfcctl2);

	if (m == NULL || m->m_len < mfcctl_size)
		return (EINVAL);

	/*
	 * select data size depending on API version.
	 */
	if (mrt_api_config & MRT_API_FLAGS_ALL) {
		struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *);
		memcpy((caddr_t)&mfcctl2, mp2, sizeof(*mp2));
	} else {
		struct mfcctl *mp = mtod(m, struct mfcctl *);
		memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
		memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0,
		    sizeof(mfcctl2) - sizeof(struct mfcctl));
	}

	if (mfc_add(&mfcctl2, &mfcctl2.mfcc_origin, &mfcctl2.mfcc_mcastgrp,
	    mfcctl2.mfcc_parent, rtableid, M_WAITOK) == -1)
		return (EINVAL);

	return (0);
}

int
del_mfc(struct socket *so, struct mbuf *m)
{
	struct inpcb *inp = sotoinpcb(so);
	struct rtentry *rt;
	struct mfcctl2 mfcctl2;
	int mfcctl_size = sizeof(struct mfcctl);
	struct mfcctl *mp;
	unsigned int rtableid = inp->inp_rtableid;

	NET_ASSERT_LOCKED();

	/*
	 * XXX: for deleting MFC entries the information in entries
	 * of size "struct mfcctl" is sufficient.
	 */

	if (m == NULL || m->m_len < mfcctl_size)
		return (EINVAL);

	mp = mtod(m, struct mfcctl *);

	memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
	memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0,
	    sizeof(mfcctl2) - sizeof(struct mfcctl));

	DPRINTF("origin %#08X group %#08X rtableid %d",
	    mfcctl2.mfcc_origin.s_addr, mfcctl2.mfcc_mcastgrp.s_addr, rtableid);

	while ((rt = mfc_find(NULL, &mfcctl2.mfcc_origin,
	    &mfcctl2.mfcc_mcastgrp, rtableid)) != NULL) {
		mrt_mcast_del(rt, rtableid);
		rtfree(rt);
	}

	return (0);
}

int
socket_send(struct socket *so, struct mbuf *mm, struct sockaddr_in *src)
{
	if (so != NULL) {
		int ret;

		mtx_enter(&so->so_rcv.sb_mtx);
		ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL);
		mtx_leave(&so->so_rcv.sb_mtx);

		if (ret != 0) {
			sorwakeup(so);
			return (0);
		}
	}
	m_freem(mm);
	return (-1);
}

/*
 * IP multicast forwarding function. This function assumes that the packet
 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
 * pointed to by "ifp", and the packet is to be relayed to other networks
 * that have members of the packet's destination IP multicast group.
 *
 * The packet is returned unscathed to the caller, unless it is
 * erroneous, in which case a non-zero return value tells the caller to
 * discard it.
 */

#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */

int
ip_mforward(struct mbuf *m, struct ifnet *ifp)
{
	struct ip *ip = mtod(m, struct ip *);
	struct vif *v;
	struct rtentry *rt;
	static int srctun = 0;
	struct mbuf *mm;
	unsigned int rtableid = ifp->if_rdomain;

	if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
	    ((u_char *)(ip + 1))[1] != IPOPT_LSRR) {
		/*
		 * Packet arrived via a physical interface or
		 * an encapsulated tunnel or a register_vif.
		 */
	} else {
		/*
		 * Packet arrived through a source-route tunnel.
		 * Source-route tunnels are no longer supported.
		 */
		if ((srctun++ % 1000) == 0)
			log(LOG_ERR, "ip_mforward: received source-routed "
			    "packet from %x\n", ntohl(ip->ip_src.s_addr));
		return (EOPNOTSUPP);
	}

	/*
	 * Don't forward a packet with time-to-live of zero or one,
	 * or a packet destined to a local-only group.
	 */
	if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ip->ip_dst.s_addr))
		return (0);

	/*
	 * Determine forwarding vifs from the forwarding cache table
	 */
	++mrtstat.mrts_mfc_lookups;
	rt = mfc_find(NULL, &ip->ip_src, &ip->ip_dst, rtableid);

	/* Entry exists, so forward if necessary */
	if (rt != NULL) {
		return (ip_mdq(m, ifp, rt));
	} else {
		/*
		 * If we don't have a route for packet's origin,
		 * Make a copy of the packet & send message to routing daemon
		 */
		int hlen = ip->ip_hl << 2;

		++mrtstat.mrts_mfc_misses;
		mrtstat.mrts_no_route++;

		{
			struct igmpmsg *im;

			/*
			 * Locate the vifi for the incoming interface for
			 * this packet.
			 * If none found, drop packet.
			 */
			if ((v = (struct vif *)ifp->if_mcast) == NULL)
				return (EHOSTUNREACH);
			/*
			 * Make a copy of the header to send to the user level
			 * process
			 */
			mm = m_copym(m, 0, hlen, M_NOWAIT);
			if (mm == NULL ||
			    (mm = m_pullup(mm, hlen)) == NULL)
				return (ENOBUFS);

			/*
			 * Send message to routing daemon to install
			 * a route into the kernel table
			 */

			im = mtod(mm, struct igmpmsg *);
			im->im_msgtype = IGMPMSG_NOCACHE;
			im->im_mbz = 0;
			im->im_vif = v->v_id;

			mrtstat.mrts_upcalls++;

			sin.sin_addr = ip->ip_src;
			if (socket_send(ip_mrouter[rtableid], mm, &sin) < 0) {
				log(LOG_WARNING, "ip_mforward: ip_mrouter "
				    "socket queue full\n");
				++mrtstat.mrts_upq_sockfull;
				return (ENOBUFS);
			}

			mfc_add(NULL, &ip->ip_src, &ip->ip_dst, v->v_id,
			    rtableid, M_NOWAIT);
		}

		return (0);
	}
}

/*
 * Packet forwarding routine once entry in the cache is made
 */
int
ip_mdq(struct mbuf *m, struct ifnet *ifp0, struct rtentry *rt)
{
	struct ip  *ip = mtod(m, struct ip *);
	struct mfc *mfc = (struct mfc *)rt->rt_llinfo;
	struct vif *v = (struct vif *)ifp0->if_mcast;
	struct ifnet *ifp;
	struct mbuf *mc;
	struct ip_moptions imo;

	/* Sanity check: we have all promised pointers. */
	if (v == NULL || mfc == NULL) {
		rtfree(rt);
		return (EHOSTUNREACH);
	}

	/*
	 * Don't forward if it didn't arrive from the parent vif for its origin.
	 */
	if (mfc->mfc_parent != v->v_id) {
		/* came in the wrong interface */
		++mrtstat.mrts_wrong_if;
		mfc->mfc_wrong_if++;
		rtfree(rt);
		return (0);
	}

	/* If I sourced this packet, it counts as output, else it was input. */
	if (in_hosteq(ip->ip_src, v->v_lcl_addr)) {
		v->v_pkt_out++;
		v->v_bytes_out += m->m_pkthdr.len;
	} else {
		v->v_pkt_in++;
		v->v_bytes_in += m->m_pkthdr.len;
	}

	/*
	 * For each vif, decide if a copy of the packet should be forwarded.
	 * Forward if:
	 *		- the ttl exceeds the vif's threshold
	 *		- there are group members downstream on interface
	 */
	do {
		/* Don't consider non multicast routes. */
		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
		    (RTF_HOST | RTF_MULTICAST))
			continue;

		mfc = (struct mfc *)rt->rt_llinfo;
		if (mfc == NULL)
			continue;

		mfc->mfc_pkt_cnt++;
		mfc->mfc_byte_cnt += m->m_pkthdr.len;

		/* Don't let this route expire. */
		mfc->mfc_expire = 0;

		if (ip->ip_ttl <= mfc->mfc_ttl)
			continue;
		if ((ifp = if_get(rt->rt_ifidx)) == NULL)
			continue;

		/* Sanity check: did we configure this? */
		if ((v = (struct vif *)ifp->if_mcast) == NULL) {
			if_put(ifp);
			continue;
		}

		/* Don't send in the upstream interface. */
		if (mfc->mfc_parent == v->v_id) {
			if_put(ifp);
			continue;
		}

		v->v_pkt_out++;
		v->v_bytes_out += m->m_pkthdr.len;

		/*
		 * Make a new reference to the packet; make sure
		 * that the IP header is actually copied, not
		 * just referenced, so that ip_output() only
		 * scribbles on the copy.
		 */
		mc = m_dup_pkt(m, max_linkhdr, M_NOWAIT);
		if (mc == NULL) {
			if_put(ifp);
			rtfree(rt);
			return (ENOBUFS);
		}

		/*
		 * if physical interface option, extract the options
		 * and then send
		 */
		imo.imo_ifidx = rt->rt_ifidx;
		imo.imo_ttl = ip->ip_ttl - IPTTLDEC;
		imo.imo_loop = 1;

		ip_output(mc, NULL, NULL, IP_FORWARDING, &imo, NULL, 0);
		if_put(ifp);
	} while ((rt = rtable_iterate(rt)) != NULL);

	return (0);
}

struct ifnet *
if_lookupbyvif(vifi_t vifi, unsigned int rtableid)
{
	struct vif	*v;
	struct ifnet	*ifp;

	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
		if (ifp->if_rdomain != rtableid)
			continue;
		if ((v = (struct vif *)ifp->if_mcast) == NULL)
			continue;
		if (v->v_id != vifi)
			continue;

		return (ifp);
	}

	return (NULL);
}

struct rtentry *
rt_mcast_add(struct ifnet *ifp, struct sockaddr *origin, struct sockaddr *group)
{
	struct ifaddr		*ifa;
	int			 rv;
	unsigned int		 rtableid = ifp->if_rdomain;

	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
		if (ifa->ifa_addr->sa_family == AF_INET)
			break;
	}
	if (ifa == NULL) {
		DPRINTF("ifa == NULL");
		return (NULL);
	}

	rv = rt_ifa_add(ifa, RTF_HOST | RTF_MULTICAST | RTF_MPATH,
	    group, ifp->if_rdomain);
	if (rv != 0) {
		DPRINTF("rt_ifa_add failed (%d)", rv);
		return (NULL);
	}

	mrt_count[rtableid]++;

	return (mfc_find(ifp, NULL, &satosin(group)->sin_addr, rtableid));
}

void
mrt_mcast_del(struct rtentry *rt, unsigned int rtableid)
{
	struct ifnet		*ifp;
	int			 error;

	/* Remove all timers related to this route. */
	rt_timer_remove_all(rt);

	free(rt->rt_llinfo, M_MRTABLE, sizeof(struct mfc));
	rt->rt_llinfo = NULL;

	ifp = if_get(rt->rt_ifidx);
	if (ifp == NULL)
		return;
	error = rtdeletemsg(rt, ifp, rtableid);
	if_put(ifp);

	if (error)
		DPRINTF("delete route error %d\n", error);

	mrt_count[rtableid]--;
}