[BACK]Return to kroute.c CVS log [TXT][DIR] Up to [local] / src / usr.sbin / bgpd

File: [local] / src / usr.sbin / bgpd / kroute.c (download)

Revision 1.309, Tue Jan 9 13:41:32 2024 UTC (4 months, 3 weeks ago) by claudio
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, HEAD
Changes since 1.308: +26 -25 lines

Convert the parent process imsg handling over to the new imsg API.

This simplifies the code a fair bit and removes direct unchecked memory
access to imsg.data.
OK tb@

/*	$OpenBSD: kroute.c,v 1.309 2024/01/09 13:41:32 claudio Exp $ */

/*
 * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
 * Copyright (c) 2022 Claudio Jeker <claudio@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/tree.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netmpls/mpls.h>

#include <errno.h>
#include <ifaddrs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <imsg.h>

#include "bgpd.h"
#include "log.h"

#define	RTP_MINE	0xff

struct ktable		**krt;
u_int			  krt_size;

struct {
	uint32_t		rtseq;
	pid_t			pid;
	int			fd;
	uint8_t			fib_prio;
} kr_state;

struct kroute {
	RB_ENTRY(kroute)	 entry;
	struct kroute		*next;
	struct in_addr		 prefix;
	struct in_addr		 nexthop;
	uint32_t		 mplslabel;
	uint16_t		 flags;
	uint16_t		 labelid;
	u_short			 ifindex;
	uint8_t			 prefixlen;
	uint8_t			 priority;
};

struct kroute6 {
	RB_ENTRY(kroute6)	 entry;
	struct kroute6		*next;
	struct in6_addr		 prefix;
	struct in6_addr		 nexthop;
	uint32_t		 prefix_scope_id;	/* because ... */
	uint32_t		 nexthop_scope_id;
	uint32_t		 mplslabel;
	uint16_t		 flags;
	uint16_t		 labelid;
	u_short			 ifindex;
	uint8_t			 prefixlen;
	uint8_t			 priority;
};

struct knexthop {
	RB_ENTRY(knexthop)	 entry;
	struct bgpd_addr	 nexthop;
	void			*kroute;
	u_short			 ifindex;
};

struct kredist_node {
	RB_ENTRY(kredist_node)	 entry;
	struct bgpd_addr	 prefix;
	uint64_t		 rd;
	uint8_t			 prefixlen;
	uint8_t			 dynamic;
};

struct kif {
	RB_ENTRY(kif)		 entry;
	char			 ifname[IFNAMSIZ];
	uint64_t		 baudrate;
	u_int			 rdomain;
	int			 flags;
	u_short			 ifindex;
	uint8_t			 if_type;
	uint8_t			 link_state;
	uint8_t			 nh_reachable;	/* for nexthop verification */
	uint8_t			 depend_state;	/* for session depend on */
};

int	ktable_new(u_int, u_int, char *, int);
void	ktable_free(u_int);
void	ktable_destroy(struct ktable *);
struct ktable	*ktable_get(u_int);

int	kr4_change(struct ktable *, struct kroute_full *);
int	kr6_change(struct ktable *, struct kroute_full *);
int	krVPN4_change(struct ktable *, struct kroute_full *);
int	krVPN6_change(struct ktable *, struct kroute_full *);
int	kr_net_match(struct ktable *, struct network_config *, uint16_t, int);
struct network *kr_net_find(struct ktable *, struct network *);
void	kr_net_clear(struct ktable *);
void	kr_redistribute(int, struct ktable *, struct kroute_full *);
uint8_t	kr_priority(struct kroute_full *);
struct kroute_full *kr_tofull(struct kroute *);
struct kroute_full *kr6_tofull(struct kroute6 *);
int	kroute_compare(struct kroute *, struct kroute *);
int	kroute6_compare(struct kroute6 *, struct kroute6 *);
int	knexthop_compare(struct knexthop *, struct knexthop *);
int	kredist_compare(struct kredist_node *, struct kredist_node *);
int	kif_compare(struct kif *, struct kif *);

struct kroute	*kroute_find(struct ktable *, const struct bgpd_addr *,
		    uint8_t, uint8_t);
struct kroute	*kroute_matchgw(struct kroute *, struct kroute_full *);
int		 kroute_insert(struct ktable *, struct kroute_full *);
int		 kroute_remove(struct ktable *, struct kroute_full *, int);
void		 kroute_clear(struct ktable *);

struct kroute6	*kroute6_find(struct ktable *, const struct bgpd_addr *,
		    uint8_t, uint8_t);
struct kroute6	*kroute6_matchgw(struct kroute6 *, struct kroute_full *);
void		 kroute6_clear(struct ktable *);

struct knexthop	*knexthop_find(struct ktable *, struct bgpd_addr *);
int		 knexthop_insert(struct ktable *, struct knexthop *);
void		 knexthop_remove(struct ktable *, struct knexthop *);
void		 knexthop_clear(struct ktable *);

struct kif	*kif_find(int);
int		 kif_insert(struct kif *);
int		 kif_remove(struct kif *);
void		 kif_clear(void);

int		 kroute_validate(struct kroute *);
int		 kroute6_validate(struct kroute6 *);
int		 knexthop_true_nexthop(struct ktable *, struct kroute_full *);
void		 knexthop_validate(struct ktable *, struct knexthop *);
void		 knexthop_track(struct ktable *, u_short);
void		 knexthop_update(struct ktable *, struct kroute_full *);
void		 knexthop_send_update(struct knexthop *);
struct kroute	*kroute_match(struct ktable *, struct bgpd_addr *, int);
struct kroute6	*kroute6_match(struct ktable *, struct bgpd_addr *, int);
void		 kroute_detach_nexthop(struct ktable *, struct knexthop *);

uint8_t		prefixlen_classful(in_addr_t);
uint64_t	ift2ifm(uint8_t);
const char	*get_media_descr(uint64_t);
const char	*get_linkstate(uint8_t, int);
void		get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
void		if_change(u_short, int, struct if_data *);
void		if_announce(void *);

int		send_rtmsg(int, struct ktable *, struct kroute_full *);
int		dispatch_rtmsg(void);
int		fetchtable(struct ktable *);
int		fetchifs(int);
int		dispatch_rtmsg_addr(struct rt_msghdr *, struct kroute_full *);
int		kr_fib_delete(struct ktable *, struct kroute_full *, int);
int		kr_fib_change(struct ktable *, struct kroute_full *, int, int);

RB_PROTOTYPE(kroute_tree, kroute, entry, kroute_compare)
RB_GENERATE(kroute_tree, kroute, entry, kroute_compare)

RB_PROTOTYPE(kroute6_tree, kroute6, entry, kroute6_compare)
RB_GENERATE(kroute6_tree, kroute6, entry, kroute6_compare)

RB_PROTOTYPE(knexthop_tree, knexthop, entry, knexthop_compare)
RB_GENERATE(knexthop_tree, knexthop, entry, knexthop_compare)

RB_PROTOTYPE(kredist_tree, kredist_node, entry, kredist_compare)
RB_GENERATE(kredist_tree, kredist_node, entry, kredist_compare)

RB_HEAD(kif_tree, kif)		kit;
RB_PROTOTYPE(kif_tree, kif, entry, kif_compare)
RB_GENERATE(kif_tree, kif, entry, kif_compare)

#define KT2KNT(x)	(&(ktable_get((x)->nhtableid)->knt))

/*
 * exported functions
 */

int
kr_init(int *fd, uint8_t fib_prio)
{
	int		opt = 0, rcvbuf, default_rcvbuf;
	unsigned int	tid = RTABLE_ANY;
	socklen_t	optlen;

	if ((kr_state.fd = socket(AF_ROUTE,
	    SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) {
		log_warn("%s: socket", __func__);
		return (-1);
	}

	/* not interested in my own messages */
	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
	    &opt, sizeof(opt)) == -1)
		log_warn("%s: setsockopt", __func__);	/* not fatal */

	/* grow receive buffer, don't wanna miss messages */
	optlen = sizeof(default_rcvbuf);
	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
	    &default_rcvbuf, &optlen) == -1)
		log_warn("%s: getsockopt SOL_SOCKET SO_RCVBUF", __func__);
	else
		for (rcvbuf = MAX_RTSOCK_BUF;
		    rcvbuf > default_rcvbuf &&
		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
		    rcvbuf /= 2)
			;	/* nothing */

	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_TABLEFILTER, &tid,
	    sizeof(tid)) == -1) {
		log_warn("%s: setsockopt AF_ROUTE ROUTE_TABLEFILTER", __func__);
		return (-1);
	}

	kr_state.pid = getpid();
	kr_state.rtseq = 1;
	kr_state.fib_prio = fib_prio;

	RB_INIT(&kit);

	if (fetchifs(0) == -1)
		return (-1);

	*fd = kr_state.fd;
	return (0);
}

int
kr_default_prio(void)
{
	return RTP_BGP;
}

int
kr_check_prio(long long prio)
{
	if (prio <= RTP_LOCAL || prio > RTP_MAX)
		return 0;
	return 1;
}

int
ktable_new(u_int rtableid, u_int rdomid, char *name, int fs)
{
	struct ktable	**xkrt;
	struct ktable	 *kt;
	size_t		  oldsize;

	/* resize index table if needed */
	if (rtableid >= krt_size) {
		oldsize = sizeof(struct ktable *) * krt_size;
		if ((xkrt = reallocarray(krt, rtableid + 1,
		    sizeof(struct ktable *))) == NULL) {
			log_warn("%s", __func__);
			return (-1);
		}
		krt = xkrt;
		krt_size = rtableid + 1;
		memset((char *)krt + oldsize, 0,
		    krt_size * sizeof(struct ktable *) - oldsize);
	}

	if (krt[rtableid])
		fatalx("ktable_new: table already exists.");

	/* allocate new element */
	kt = krt[rtableid] = calloc(1, sizeof(struct ktable));
	if (kt == NULL) {
		log_warn("%s", __func__);
		return (-1);
	}

	/* initialize structure ... */
	strlcpy(kt->descr, name, sizeof(kt->descr));
	RB_INIT(&kt->krt);
	RB_INIT(&kt->krt6);
	RB_INIT(&kt->knt);
	TAILQ_INIT(&kt->krn);
	kt->fib_conf = kt->fib_sync = fs;
	kt->rtableid = rtableid;
	kt->nhtableid = rdomid;
	/* bump refcount of rdomain table for the nexthop lookups */
	ktable_get(kt->nhtableid)->nhrefcnt++;

	/* ... and load it */
	if (fetchtable(kt) == -1)
		return (-1);

	/* everything is up and running */
	kt->state = RECONF_REINIT;
	log_debug("%s: %s with rtableid %d rdomain %d", __func__, name,
	    rtableid, rdomid);
	return (0);
}

void
ktable_free(u_int rtableid)
{
	struct ktable	*kt, *nkt;

	if ((kt = ktable_get(rtableid)) == NULL)
		return;

	/* decouple from kernel, no new routes will be entered from here */
	kr_fib_decouple(kt->rtableid);

	/* first unhook from the nexthop table */
	nkt = ktable_get(kt->nhtableid);
	nkt->nhrefcnt--;

	/*
	 * Evil little details:
	 *   If kt->nhrefcnt > 0 then kt == nkt and nothing needs to be done.
	 *   If kt != nkt then kt->nhrefcnt must be 0 and kt must be killed.
	 *   If nkt is no longer referenced it must be killed (possible double
	 *   free so check that kt != nkt).
	 */
	if (kt != nkt && nkt->nhrefcnt <= 0)
		ktable_destroy(nkt);
	if (kt->nhrefcnt <= 0)
		ktable_destroy(kt);
}

void
ktable_destroy(struct ktable *kt)
{
	/* decouple just to be sure, does not hurt */
	kr_fib_decouple(kt->rtableid);

	log_debug("%s: freeing ktable %s rtableid %u", __func__, kt->descr,
	    kt->rtableid);
	/* only clear nexthop table if it is the main rdomain table */
	if (kt->rtableid == kt->nhtableid)
		knexthop_clear(kt);
	kroute_clear(kt);
	kroute6_clear(kt);
	kr_net_clear(kt);

	krt[kt->rtableid] = NULL;
	free(kt);
}

struct ktable *
ktable_get(u_int rtableid)
{
	if (rtableid >= krt_size)
		return (NULL);
	return (krt[rtableid]);
}

int
ktable_update(u_int rtableid, char *name, int flags)
{
	struct ktable	*kt, *rkt;
	u_int		 rdomid;

	if (!ktable_exists(rtableid, &rdomid))
		fatalx("King Bula lost a table");	/* may not happen */

	if (rdomid != rtableid || flags & F_RIB_NOFIB) {
		rkt = ktable_get(rdomid);
		if (rkt == NULL) {
			char buf[32];
			snprintf(buf, sizeof(buf), "rdomain_%d", rdomid);
			if (ktable_new(rdomid, rdomid, buf, 0))
				return (-1);
		} else {
			/* there is no need for full fib synchronisation if
			 * the table is only used for nexthop lookups.
			 */
			if (rkt->state == RECONF_DELETE) {
				rkt->fib_conf = 0;
				rkt->state = RECONF_KEEP;
			}
		}
	}

	if (flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE))
		/* only rdomain table must exist */
		return (0);

	kt = ktable_get(rtableid);
	if (kt == NULL) {
		if (ktable_new(rtableid, rdomid, name,
		    !(flags & F_RIB_NOFIBSYNC)))
			return (-1);
	} else {
		/* fib sync has higher preference then no sync */
		if (kt->state == RECONF_DELETE) {
			kt->fib_conf = !(flags & F_RIB_NOFIBSYNC);
			kt->state = RECONF_KEEP;
		} else if (!kt->fib_conf)
			kt->fib_conf = !(flags & F_RIB_NOFIBSYNC);

		strlcpy(kt->descr, name, sizeof(kt->descr));
	}
	return (0);
}

int
ktable_exists(u_int rtableid, u_int *rdomid)
{
	size_t			 len;
	struct rt_tableinfo	 info;
	int			 mib[6];

	mib[0] = CTL_NET;
	mib[1] = PF_ROUTE;
	mib[2] = 0;
	mib[3] = 0;
	mib[4] = NET_RT_TABLE;
	mib[5] = rtableid;

	len = sizeof(info);
	if (sysctl(mib, 6, &info, &len, NULL, 0) == -1) {
		if (errno == ENOENT)
			/* table nonexistent */
			return (0);
		log_warn("sysctl net.route.rtableid");
		/* must return 0 so that the table is considered non-existent */
		return (0);
	}
	if (rdomid)
		*rdomid = info.rti_domainid;
	return (1);
}

int
kr_change(u_int rtableid, struct kroute_full *kf)
{
	struct ktable		*kt;

	if ((kt = ktable_get(rtableid)) == NULL)
		/* too noisy during reloads, just ignore */
		return (0);
	kf->flags |= F_BGPD;
	kf->priority = RTP_MINE;
	if (!knexthop_true_nexthop(kt, kf))
		return kroute_remove(kt, kf, 1);
	switch (kf->prefix.aid) {
	case AID_INET:
		return (kr4_change(kt, kf));
	case AID_INET6:
		return (kr6_change(kt, kf));
	case AID_VPN_IPv4:
		return (krVPN4_change(kt, kf));
	case AID_VPN_IPv6:
		return (krVPN6_change(kt, kf));
	}
	log_warnx("%s: not handled AID", __func__);
	return (-1);
}

int
kr4_change(struct ktable *kt, struct kroute_full *kf)
{
	struct kroute	*kr;

	/* for blackhole and reject routes nexthop needs to be 127.0.0.1 */
	if (kf->flags & (F_BLACKHOLE|F_REJECT))
		kf->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK);
	/* nexthop within 127/8 -> ignore silently */
	else if ((kf->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) ==
	    htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
		return (0);

	if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
	    kf->priority)) == NULL) {
		if (kroute_insert(kt, kf) == -1)
			return (-1);
	} else {
		kr->nexthop.s_addr = kf->nexthop.v4.s_addr;
		rtlabel_unref(kr->labelid);
		kr->labelid = rtlabel_name2id(kf->label);
		if (kf->flags & F_BLACKHOLE)
			kr->flags |= F_BLACKHOLE;
		else
			kr->flags &= ~F_BLACKHOLE;
		if (kf->flags & F_REJECT)
			kr->flags |= F_REJECT;
		else
			kr->flags &= ~F_REJECT;

		if (send_rtmsg(RTM_CHANGE, kt, kf))
			kr->flags |= F_BGPD_INSERTED;
	}

	return (0);
}

int
kr6_change(struct ktable *kt, struct kroute_full *kf)
{
	struct kroute6	*kr6;
	struct in6_addr	 lo6 = IN6ADDR_LOOPBACK_INIT;

	/* for blackhole and reject routes nexthop needs to be ::1 */
	if (kf->flags & (F_BLACKHOLE|F_REJECT))
		memcpy(&kf->nexthop.v6, &lo6, sizeof(kf->nexthop.v6));
	/* nexthop to loopback -> ignore silently */
	else if (IN6_IS_ADDR_LOOPBACK(&kf->nexthop.v6))
		return (0);

	if ((kr6 = kroute6_find(kt, &kf->prefix, kf->prefixlen,
	    kf->priority)) == NULL) {
		if (kroute_insert(kt, kf) == -1)
			return (-1);
	} else {
		memcpy(&kr6->nexthop, &kf->nexthop.v6, sizeof(struct in6_addr));
		kr6->nexthop_scope_id = kf->nexthop.scope_id;
		rtlabel_unref(kr6->labelid);
		kr6->labelid = rtlabel_name2id(kf->label);
		if (kf->flags & F_BLACKHOLE)
			kr6->flags |= F_BLACKHOLE;
		else
			kr6->flags &= ~F_BLACKHOLE;
		if (kf->flags & F_REJECT)
			kr6->flags |= F_REJECT;
		else
			kr6->flags &= ~F_REJECT;

		if (send_rtmsg(RTM_CHANGE, kt, kf))
			kr6->flags |= F_BGPD_INSERTED;
	}

	return (0);
}

int
krVPN4_change(struct ktable *kt, struct kroute_full *kf)
{
	struct kroute	*kr;
	uint32_t	 mplslabel = 0;

	/* nexthop within 127/8 -> ignore silently */
	if ((kf->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) ==
	    htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
		return (0);

	/* only a single MPLS label is supported for now */
	if (kf->prefix.labellen != 3) {
		log_warnx("%s: %s/%u has not a single label", __func__,
		    log_addr(&kf->prefix), kf->prefixlen);
		return (0);
	}
	mplslabel = (kf->prefix.labelstack[0] << 24) |
	    (kf->prefix.labelstack[1] << 16) |
	    (kf->prefix.labelstack[2] << 8);
	mplslabel = htonl(mplslabel);

	kf->flags |= F_MPLS;
	kf->mplslabel = mplslabel;

	/* for blackhole and reject routes nexthop needs to be 127.0.0.1 */
	if (kf->flags & (F_BLACKHOLE|F_REJECT))
		kf->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK);

	if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
	    kf->priority)) == NULL) {
		if (kroute_insert(kt, kf) == -1)
			return (-1);
	} else {
		kr->mplslabel = mplslabel;
		kr->flags |= F_MPLS;
		kr->ifindex = kf->ifindex;
		kr->nexthop.s_addr = kf->nexthop.v4.s_addr;
		rtlabel_unref(kr->labelid);
		kr->labelid = rtlabel_name2id(kf->label);
		if (kf->flags & F_BLACKHOLE)
			kr->flags |= F_BLACKHOLE;
		else
			kr->flags &= ~F_BLACKHOLE;
		if (kf->flags & F_REJECT)
			kr->flags |= F_REJECT;
		else
			kr->flags &= ~F_REJECT;

		if (send_rtmsg(RTM_CHANGE, kt, kf))
			kr->flags |= F_BGPD_INSERTED;
	}

	return (0);
}

int
krVPN6_change(struct ktable *kt, struct kroute_full *kf)
{
	struct kroute6	*kr6;
	struct in6_addr	 lo6 = IN6ADDR_LOOPBACK_INIT;
	uint32_t	 mplslabel = 0;

	/* nexthop to loopback -> ignore silently */
	if (IN6_IS_ADDR_LOOPBACK(&kf->nexthop.v6))
		return (0);

	/* only a single MPLS label is supported for now */
	if (kf->prefix.labellen != 3) {
		log_warnx("%s: %s/%u has not a single label", __func__,
		    log_addr(&kf->prefix), kf->prefixlen);
		return (0);
	}
	mplslabel = (kf->prefix.labelstack[0] << 24) |
	    (kf->prefix.labelstack[1] << 16) |
	    (kf->prefix.labelstack[2] << 8);
	mplslabel = htonl(mplslabel);

	kf->flags |= F_MPLS;
	kf->mplslabel = mplslabel;

	/* for blackhole and reject routes nexthop needs to be ::1 */
	if (kf->flags & (F_BLACKHOLE|F_REJECT))
		memcpy(&kf->nexthop.v6, &lo6, sizeof(kf->nexthop.v6));

	if ((kr6 = kroute6_find(kt, &kf->prefix, kf->prefixlen,
	    kf->priority)) == NULL) {
		if (kroute_insert(kt, kf) == -1)
			return (-1);
	} else {
		kr6->mplslabel = mplslabel;
		kr6->flags |= F_MPLS;
		kr6->ifindex = kf->ifindex;
		memcpy(&kr6->nexthop, &kf->nexthop.v6, sizeof(struct in6_addr));
		kr6->nexthop_scope_id = kf->nexthop.scope_id;
		rtlabel_unref(kr6->labelid);
		kr6->labelid = rtlabel_name2id(kf->label);
		if (kf->flags & F_BLACKHOLE)
			kr6->flags |= F_BLACKHOLE;
		else
			kr6->flags &= ~F_BLACKHOLE;
		if (kf->flags & F_REJECT)
			kr6->flags |= F_REJECT;
		else
			kr6->flags &= ~F_REJECT;

		if (send_rtmsg(RTM_CHANGE, kt, kf))
			kr6->flags |= F_BGPD_INSERTED;
	}

	return (0);
}

int
kr_delete(u_int rtableid, struct kroute_full *kf)
{
	struct ktable		*kt;

	if ((kt = ktable_get(rtableid)) == NULL)
		/* too noisy during reloads, just ignore */
		return (0);
	kf->flags |= F_BGPD;
	kf->priority = RTP_MINE;
	return kroute_remove(kt, kf, 1);
}

int
kr_flush(u_int rtableid)
{
	struct ktable	*kt;
	struct kroute	*kr, *next;
	struct kroute6	*kr6, *next6;

	if ((kt = ktable_get(rtableid)) == NULL)
		/* too noisy during reloads, just ignore */
		return (0);

	RB_FOREACH_SAFE(kr, kroute_tree, &kt->krt, next)
		if ((kr->flags & F_BGPD_INSERTED)) {
			if (kroute_remove(kt, kr_tofull(kr), 1) == -1)
				return (-1);
		}
	RB_FOREACH_SAFE(kr6, kroute6_tree, &kt->krt6, next6)
		if ((kr6->flags & F_BGPD_INSERTED)) {
			if (kroute_remove(kt, kr6_tofull(kr6), 1) == -1)
				return (-1);
		}

	kt->fib_sync = 0;
	return (0);
}

void
kr_shutdown(void)
{
	u_int	i;

	for (i = krt_size; i > 0; i--)
		ktable_free(i - 1);
	kif_clear();
	free(krt);
}

void
kr_fib_couple(u_int rtableid)
{
	struct ktable	*kt;
	struct kroute	*kr;
	struct kroute6	*kr6;

	if ((kt = ktable_get(rtableid)) == NULL)  /* table does not exist */
		return;

	if (kt->fib_sync)	/* already coupled */
		return;

	kt->fib_sync = 1;

	RB_FOREACH(kr, kroute_tree, &kt->krt)
		if (kr->flags & F_BGPD) {
			if (send_rtmsg(RTM_ADD, kt, kr_tofull(kr)))
				kr->flags |= F_BGPD_INSERTED;
		}
	RB_FOREACH(kr6, kroute6_tree, &kt->krt6)
		if (kr6->flags & F_BGPD) {
			if (send_rtmsg(RTM_ADD, kt, kr6_tofull(kr6)))
				kr6->flags |= F_BGPD_INSERTED;
		}
	log_info("kernel routing table %u (%s) coupled", kt->rtableid,
	    kt->descr);
}

void
kr_fib_couple_all(void)
{
	u_int	 i;

	for (i = krt_size; i > 0; i--)
		kr_fib_couple(i - 1);
}

void
kr_fib_decouple(u_int rtableid)
{
	struct ktable	*kt;
	struct kroute	*kr;
	struct kroute6	*kr6;

	if ((kt = ktable_get(rtableid)) == NULL)  /* table does not exist */
		return;

	if (!kt->fib_sync)	/* already decoupled */
		return;

	RB_FOREACH(kr, kroute_tree, &kt->krt)
		if ((kr->flags & F_BGPD_INSERTED)) {
			if (send_rtmsg(RTM_DELETE, kt, kr_tofull(kr)))
				kr->flags &= ~F_BGPD_INSERTED;
		}
	RB_FOREACH(kr6, kroute6_tree, &kt->krt6)
		if ((kr6->flags & F_BGPD_INSERTED)) {
			if (send_rtmsg(RTM_DELETE, kt, kr6_tofull(kr6)))
				kr6->flags &= ~F_BGPD_INSERTED;
		}

	kt->fib_sync = 0;

	log_info("kernel routing table %u (%s) decoupled", kt->rtableid,
	    kt->descr);
}

void
kr_fib_decouple_all(void)
{
	u_int	 i;

	for (i = krt_size; i > 0; i--)
		kr_fib_decouple(i - 1);
}

void
kr_fib_prio_set(uint8_t prio)
{
	kr_state.fib_prio = prio;
}

int
kr_dispatch_msg(void)
{
	return (dispatch_rtmsg());
}

int
kr_nexthop_add(u_int rtableid, struct bgpd_addr *addr)
{
	struct ktable	*kt;
	struct knexthop	*h;

	if ((kt = ktable_get(rtableid)) == NULL) {
		log_warnx("%s: non-existent rtableid %d", __func__, rtableid);
		return (0);
	}
	if ((h = knexthop_find(kt, addr)) != NULL) {
		/* should not happen... this is actually an error path */
		knexthop_send_update(h);
	} else {
		if ((h = calloc(1, sizeof(*h))) == NULL) {
			log_warn("%s", __func__);
			return (-1);
		}
		memcpy(&h->nexthop, addr, sizeof(h->nexthop));

		if (knexthop_insert(kt, h) == -1)
			return (-1);
	}

	return (0);
}

void
kr_nexthop_delete(u_int rtableid, struct bgpd_addr *addr)
{
	struct ktable	*kt;
	struct knexthop	*kn;

	if ((kt = ktable_get(rtableid)) == NULL) {
		log_warnx("%s: non-existent rtableid %d", __func__,
		    rtableid);
		return;
	}
	if ((kn = knexthop_find(kt, addr)) == NULL)
		return;

	knexthop_remove(kt, kn);
}

static struct ctl_show_interface *
kr_show_interface(struct kif *kif)
{
	static struct ctl_show_interface iface;
	uint64_t ifms_type;

	memset(&iface, 0, sizeof(iface));
	strlcpy(iface.ifname, kif->ifname, sizeof(iface.ifname));

	snprintf(iface.linkstate, sizeof(iface.linkstate),
	    "%s", get_linkstate(kif->if_type, kif->link_state));

	if ((ifms_type = ift2ifm(kif->if_type)) != 0)
		snprintf(iface.media, sizeof(iface.media),
		    "%s", get_media_descr(ifms_type));

	iface.baudrate = kif->baudrate;
	iface.rdomain = kif->rdomain;
	iface.nh_reachable = kif->nh_reachable;
	iface.is_up = (kif->flags & IFF_UP) == IFF_UP;

	return &iface;
}

void
kr_show_route(struct imsg *imsg)
{
	struct ktable		*kt;
	struct kroute		*kr, *kn;
	struct kroute6		*kr6, *kn6;
	struct kroute_full	*kf;
	struct bgpd_addr	 addr;
	struct ctl_kroute_req	 req;
	struct ctl_show_nexthop	 snh;
	struct knexthop		*h;
	struct kif		*kif;
	uint32_t		 tableid;
	pid_t			 pid;
	u_int			 i;
	u_short			 ifindex = 0;

	tableid = imsg_get_id(imsg);
	pid = imsg_get_pid(imsg);
	switch (imsg_get_type(imsg)) {
	case IMSG_CTL_KROUTE:
		if (imsg_get_data(imsg, &req, sizeof(req)) == -1) {
			log_warnx("%s: wrong imsg len", __func__);
			break;
		}
		kt = ktable_get(tableid);
		if (kt == NULL) {
			log_warnx("%s: table %u does not exist", __func__,
			    tableid);
			break;
		}
		if (!req.af || req.af == AF_INET)
			RB_FOREACH(kr, kroute_tree, &kt->krt) {
				if (req.flags && (kr->flags & req.flags) == 0)
					continue;
				kn = kr;
				do {
					kf = kr_tofull(kn);
					kf->priority = kr_priority(kf);
					send_imsg_session(IMSG_CTL_KROUTE,
					    pid, kf, sizeof(*kf));
				} while ((kn = kn->next) != NULL);
			}
		if (!req.af || req.af == AF_INET6)
			RB_FOREACH(kr6, kroute6_tree, &kt->krt6) {
				if (req.flags && (kr6->flags & req.flags) == 0)
					continue;
				kn6 = kr6;
				do {
					kf = kr6_tofull(kn6);
					kf->priority = kr_priority(kf);
					send_imsg_session(IMSG_CTL_KROUTE,
					    pid, kf, sizeof(*kf));
				} while ((kn6 = kn6->next) != NULL);
			}
		break;
	case IMSG_CTL_KROUTE_ADDR:
		if (imsg_get_data(imsg, &addr, sizeof(addr)) == -1) {
			log_warnx("%s: wrong imsg len", __func__);
			break;
		}
		kt = ktable_get(tableid);
		if (kt == NULL) {
			log_warnx("%s: table %u does not exist", __func__,
			    tableid);
			break;
		}
		kr = NULL;
		switch (addr.aid) {
		case AID_INET:
			kr = kroute_match(kt, &addr, 1);
			if (kr != NULL) {
				kf = kr_tofull(kr);
				kf->priority = kr_priority(kf);
				send_imsg_session(IMSG_CTL_KROUTE,
				    pid, kf, sizeof(*kf));
			}
			break;
		case AID_INET6:
			kr6 = kroute6_match(kt, &addr, 1);
			if (kr6 != NULL) {
				kf = kr6_tofull(kr6);
				kf->priority = kr_priority(kf);
				send_imsg_session(IMSG_CTL_KROUTE,
				    pid, kf, sizeof(*kf));
			}
			break;
		}
		break;
	case IMSG_CTL_SHOW_NEXTHOP:
		kt = ktable_get(tableid);
		if (kt == NULL) {
			log_warnx("%s: table %u does not exist", __func__,
			    tableid);
			break;
		}
		RB_FOREACH(h, knexthop_tree, KT2KNT(kt)) {
			memset(&snh, 0, sizeof(snh));
			memcpy(&snh.addr, &h->nexthop, sizeof(snh.addr));
			if (h->kroute != NULL) {
				switch (h->nexthop.aid) {
				case AID_INET:
					kr = h->kroute;
					snh.valid = kroute_validate(kr);
					snh.krvalid = 1;
					snh.kr = *kr_tofull(kr);
					ifindex = kr->ifindex;
					break;
				case AID_INET6:
					kr6 = h->kroute;
					snh.valid = kroute6_validate(kr6);
					snh.krvalid = 1;
					snh.kr = *kr6_tofull(kr6);
					ifindex = kr6->ifindex;
					break;
				}
				snh.kr.priority = kr_priority(&snh.kr);
				if ((kif = kif_find(ifindex)) != NULL)
					memcpy(&snh.iface,
					    kr_show_interface(kif),
					    sizeof(snh.iface));
			}
			send_imsg_session(IMSG_CTL_SHOW_NEXTHOP, pid,
			    &snh, sizeof(snh));
		}
		break;
	case IMSG_CTL_SHOW_INTERFACE:
		RB_FOREACH(kif, kif_tree, &kit)
			send_imsg_session(IMSG_CTL_SHOW_INTERFACE,
			    pid, kr_show_interface(kif),
			    sizeof(struct ctl_show_interface));
		break;
	case IMSG_CTL_SHOW_FIB_TABLES:
		for (i = 0; i < krt_size; i++) {
			struct ktable	ktab;

			if ((kt = ktable_get(i)) == NULL)
				continue;

			ktab = *kt;
			/* do not leak internal information */
			RB_INIT(&ktab.krt);
			RB_INIT(&ktab.krt6);
			RB_INIT(&ktab.knt);
			TAILQ_INIT(&ktab.krn);

			send_imsg_session(IMSG_CTL_SHOW_FIB_TABLES,
			    pid, &ktab, sizeof(ktab));
		}
		break;
	default:	/* nada */
		break;
	}

	send_imsg_session(IMSG_CTL_END, pid, NULL, 0);
}

static void
kr_send_dependon(struct kif *kif)
{
	struct session_dependon sdon = { 0 };

	strlcpy(sdon.ifname, kif->ifname, sizeof(sdon.ifname));
	sdon.depend_state = kif->depend_state;
	send_imsg_session(IMSG_SESSION_DEPENDON, 0, &sdon, sizeof(sdon));
}

void
kr_ifinfo(char *ifname)
{
	struct kif	*kif;

	RB_FOREACH(kif, kif_tree, &kit)
		if (!strcmp(ifname, kif->ifname)) {
			kr_send_dependon(kif);
			return;
		}
}

static int
kr_net_redist_add(struct ktable *kt, struct network_config *net,
    struct filter_set_head *attr, int dynamic)
{
	struct kredist_node *r, *xr;

	if ((r = calloc(1, sizeof(*r))) == NULL)
		fatal("%s", __func__);
	r->prefix = net->prefix;
	r->prefixlen = net->prefixlen;
	r->rd = net->rd;
	r->dynamic = dynamic;

	xr = RB_INSERT(kredist_tree, &kt->kredist, r);
	if (xr != NULL) {
		free(r);

		if (dynamic != xr->dynamic && dynamic) {
			/*
			 * ignore update a non-dynamic announcement is
			 * already present which has preference.
			 */
			return 0;
		}
		/*
		 * only equal or non-dynamic announcement ends up here.
		 * In both cases reset the dynamic flag (nop for equal) and
		 * redistribute.
		 */
		xr->dynamic = dynamic;
	}

	if (send_network(IMSG_NETWORK_ADD, net, attr) == -1)
		log_warnx("%s: failed to send network update", __func__);
	return 1;
}

static void
kr_net_redist_del(struct ktable *kt, struct network_config *net, int dynamic)
{
	struct kredist_node *r, node;

	memset(&node, 0, sizeof(node));
	node.prefix = net->prefix;
	node.prefixlen = net->prefixlen;
	node.rd = net->rd;

	r = RB_FIND(kredist_tree, &kt->kredist, &node);
	if (r == NULL || dynamic != r->dynamic)
		return;

	if (RB_REMOVE(kredist_tree, &kt->kredist, r) == NULL) {
		log_warnx("%s: failed to remove network %s/%u", __func__,
		    log_addr(&node.prefix), node.prefixlen);
		return;
	}
	free(r);

	if (send_network(IMSG_NETWORK_REMOVE, net, NULL) == -1)
		log_warnx("%s: failed to send network removal", __func__);
}

int
kr_net_match(struct ktable *kt, struct network_config *net, uint16_t flags,
    int loopback)
{
	struct network		*xn;

	TAILQ_FOREACH(xn, &kt->krn, entry) {
		if (xn->net.prefix.aid != net->prefix.aid)
			continue;
		switch (xn->net.type) {
		case NETWORK_DEFAULT:
			/* static match already redistributed */
			continue;
		case NETWORK_STATIC:
			/* Skip networks with nexthop on loopback. */
			if (loopback)
				continue;
			if (flags & F_STATIC)
				break;
			continue;
		case NETWORK_CONNECTED:
			/* Skip networks with nexthop on loopback. */
			if (loopback)
				continue;
			if (flags & F_CONNECTED)
				break;
			continue;
		case NETWORK_RTLABEL:
			if (net->rtlabel == xn->net.rtlabel)
				break;
			continue;
		case NETWORK_PRIORITY:
			if (net->priority == xn->net.priority)
				break;
			continue;
		case NETWORK_MRTCLONE:
		case NETWORK_PREFIXSET:
			/* must not happen */
			log_warnx("%s: found a NETWORK_PREFIXSET, "
			    "please send a bug report", __func__);
			continue;
		}

		net->rd = xn->net.rd;
		if (kr_net_redist_add(kt, net, &xn->net.attrset, 1))
			return (1);
	}
	return (0);
}

struct network *
kr_net_find(struct ktable *kt, struct network *n)
{
	struct network		*xn;

	TAILQ_FOREACH(xn, &kt->krn, entry) {
		if (n->net.type != xn->net.type ||
		    n->net.prefixlen != xn->net.prefixlen ||
		    n->net.rd != xn->net.rd ||
		    n->net.rtlabel != xn->net.rtlabel ||
		    n->net.priority != xn->net.priority)
			continue;
		if (memcmp(&n->net.prefix, &xn->net.prefix,
		    sizeof(n->net.prefix)) == 0)
			return (xn);
	}
	return (NULL);
}

void
kr_net_reload(u_int rtableid, uint64_t rd, struct network_head *nh)
{
	struct network		*n, *xn;
	struct ktable		*kt;

	if ((kt = ktable_get(rtableid)) == NULL)
		fatalx("%s: non-existent rtableid %d", __func__, rtableid);

	while ((n = TAILQ_FIRST(nh)) != NULL) {
		TAILQ_REMOVE(nh, n, entry);
		n->net.old = 0;
		n->net.rd = rd;
		xn = kr_net_find(kt, n);
		if (xn) {
			xn->net.old = 0;
			filterset_free(&xn->net.attrset);
			filterset_move(&n->net.attrset, &xn->net.attrset);
			network_free(n);
		} else
			TAILQ_INSERT_TAIL(&kt->krn, n, entry);
	}
}

void
kr_net_clear(struct ktable *kt)
{
	struct network *n, *xn;

	TAILQ_FOREACH_SAFE(n, &kt->krn, entry, xn) {
		TAILQ_REMOVE(&kt->krn, n, entry);
		if (n->net.type == NETWORK_DEFAULT)
			kr_net_redist_del(kt, &n->net, 0);
		network_free(n);
	}
}

void
kr_redistribute(int type, struct ktable *kt, struct kroute_full *kf)
{
	struct network_config	 net;
	uint32_t		 a;
	int			 loflag = 0;

	memset(&net, 0, sizeof(net));
	net.prefix = kf->prefix;
	net.prefixlen = kf->prefixlen;
	net.rtlabel = rtlabel_name2id(kf->label);
	rtlabel_unref(net.rtlabel); /* drop reference now, which is ok here */
	net.priority = kf->priority;

	/* shortcut for removals */
	if (type == IMSG_NETWORK_REMOVE) {
		kr_net_redist_del(kt, &net, 1);
		return;
	}

	if (kf->flags & F_BGPD)
		return;

	switch (kf->prefix.aid) {
	case AID_INET:
		/*
		 * We consider the loopback net and multicast addresses
		 * as not redistributable.
		 */
		a = ntohl(kf->prefix.v4.s_addr);
		if (IN_MULTICAST(a) ||
		    (a >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
			return;

		/* Check if the nexthop is the loopback addr. */
		if (kf->nexthop.v4.s_addr == htonl(INADDR_LOOPBACK))
			loflag = 1;
		break;

	case AID_INET6:
		/*
		 * We consider unspecified, loopback, multicast,
		 * link- and site-local, IPv4 mapped and IPv4 compatible
		 * addresses as not redistributable.
		 */
		if (IN6_IS_ADDR_UNSPECIFIED(&kf->prefix.v6) ||
		    IN6_IS_ADDR_LOOPBACK(&kf->prefix.v6) ||
		    IN6_IS_ADDR_MULTICAST(&kf->prefix.v6) ||
		    IN6_IS_ADDR_LINKLOCAL(&kf->prefix.v6) ||
		    IN6_IS_ADDR_SITELOCAL(&kf->prefix.v6) ||
		    IN6_IS_ADDR_V4MAPPED(&kf->prefix.v6) ||
		    IN6_IS_ADDR_V4COMPAT(&kf->prefix.v6))
			return;

		/* Check if the nexthop is the loopback addr. */
		if (IN6_IS_ADDR_LOOPBACK(&kf->nexthop.v6))
			loflag = 1;
		break;
	default:
		/* unhandled AID cannot be redistributed */
		return;
	}

	/*
	 * never allow 0/0 or ::/0 the default route can only be redistributed
	 * with announce default.
	 */
	if (kf->prefixlen == 0)
		return;

	if (kr_net_match(kt, &net, kf->flags, loflag) == 0)
		/* no longer matches, if still present remove it */
		kr_net_redist_del(kt, &net, 1);
}

void
ktable_preload(void)
{
	struct ktable	*kt;
	struct network	*n;
	u_int		 i;

	for (i = 0; i < krt_size; i++) {
		if ((kt = ktable_get(i)) == NULL)
			continue;
		kt->state = RECONF_DELETE;

		/* mark all networks as old */
		TAILQ_FOREACH(n, &kt->krn, entry)
			n->net.old = 1;
	}
}

void
ktable_postload(void)
{
	struct ktable	*kt;
	struct network	*n, *xn;
	u_int		 i;

	for (i = krt_size; i > 0; i--) {
		if ((kt = ktable_get(i - 1)) == NULL)
			continue;
		if (kt->state == RECONF_DELETE) {
			ktable_free(i - 1);
			continue;
		} else if (kt->state == RECONF_REINIT)
			kt->fib_sync = kt->fib_conf;

		/* cleanup old networks */
		TAILQ_FOREACH_SAFE(n, &kt->krn, entry, xn) {
			if (n->net.old) {
				TAILQ_REMOVE(&kt->krn, n, entry);
				if (n->net.type == NETWORK_DEFAULT)
					kr_net_redist_del(kt, &n->net, 0);
				network_free(n);
			}
		}
	}
}

int
kr_reload(void)
{
	struct ktable	*kt;
	struct kroute	*kr;
	struct kroute6	*kr6;
	struct knexthop	*nh;
	struct network	*n;
	u_int		 rid;
	int		 hasdyn = 0;

	for (rid = 0; rid < krt_size; rid++) {
		if ((kt = ktable_get(rid)) == NULL)
			continue;

		/* if this is the main nexthop table revalidate nexthops */
		if (kt->rtableid == kt->nhtableid)
			RB_FOREACH(nh, knexthop_tree, KT2KNT(kt))
				knexthop_validate(kt, nh);

		TAILQ_FOREACH(n, &kt->krn, entry)
			if (n->net.type == NETWORK_DEFAULT) {
				kr_net_redist_add(kt, &n->net,
				    &n->net.attrset, 0);
			} else
				hasdyn = 1;

		if (hasdyn) {
			/* only evaluate the full tree if we need */
			RB_FOREACH(kr, kroute_tree, &kt->krt)
				kr_redistribute(IMSG_NETWORK_ADD, kt,
				    kr_tofull(kr));
			RB_FOREACH(kr6, kroute6_tree, &kt->krt6)
				kr_redistribute(IMSG_NETWORK_ADD, kt,
				    kr6_tofull(kr6));
		}
	}

	return (0);
}

uint8_t
kr_priority(struct kroute_full *kf)
{
	if (kf->priority == RTP_MINE)
		return kr_state.fib_prio;
	return kf->priority;
}

struct kroute_full *
kr_tofull(struct kroute *kr)
{
	static struct kroute_full	kf;

	memset(&kf, 0, sizeof(kf));

	kf.prefix.aid = AID_INET;
	kf.prefix.v4.s_addr = kr->prefix.s_addr;
	kf.nexthop.aid = AID_INET;
	kf.nexthop.v4.s_addr = kr->nexthop.s_addr;
	strlcpy(kf.label, rtlabel_id2name(kr->labelid), sizeof(kf.label));
	kf.flags = kr->flags;
	kf.ifindex = kr->ifindex;
	kf.prefixlen = kr->prefixlen;
	kf.priority = kr->priority;
	kf.mplslabel = kr->mplslabel;

	return (&kf);
}

struct kroute_full *
kr6_tofull(struct kroute6 *kr6)
{
	static struct kroute_full	kf;

	memset(&kf, 0, sizeof(kf));

	kf.prefix.aid = AID_INET6;
	kf.prefix.v6 = kr6->prefix;
	kf.prefix.scope_id = kr6->prefix_scope_id;
	kf.nexthop.aid = AID_INET6;
	kf.nexthop.v6 = kr6->nexthop;
	kf.nexthop.scope_id = kr6->nexthop_scope_id;
	strlcpy(kf.label, rtlabel_id2name(kr6->labelid), sizeof(kf.label));
	kf.flags = kr6->flags;
	kf.ifindex = kr6->ifindex;
	kf.prefixlen = kr6->prefixlen;
	kf.priority = kr6->priority;
	kf.mplslabel = kr6->mplslabel;

	return (&kf);
}

/*
 * RB-tree compare functions
 */

int
kroute_compare(struct kroute *a, struct kroute *b)
{
	if (ntohl(a->prefix.s_addr) < ntohl(b->prefix.s_addr))
		return (-1);
	if (ntohl(a->prefix.s_addr) > ntohl(b->prefix.s_addr))
		return (1);
	if (a->prefixlen < b->prefixlen)
		return (-1);
	if (a->prefixlen > b->prefixlen)
		return (1);

	/* if the priority is RTP_ANY finish on the first address hit */
	if (a->priority == RTP_ANY || b->priority == RTP_ANY)
		return (0);
	if (a->priority < b->priority)
		return (-1);
	if (a->priority > b->priority)
		return (1);
	return (0);
}

int
kroute6_compare(struct kroute6 *a, struct kroute6 *b)
{
	int i;

	for (i = 0; i < 16; i++) {
		if (a->prefix.s6_addr[i] < b->prefix.s6_addr[i])
			return (-1);
		if (a->prefix.s6_addr[i] > b->prefix.s6_addr[i])
			return (1);
	}
	if (a->prefix_scope_id < b->prefix_scope_id)
		return (-1);
	if (a->prefix_scope_id > b->prefix_scope_id)
		return (1);

	if (a->prefixlen < b->prefixlen)
		return (-1);
	if (a->prefixlen > b->prefixlen)
		return (1);

	/* if the priority is RTP_ANY finish on the first address hit */
	if (a->priority == RTP_ANY || b->priority == RTP_ANY)
		return (0);
	if (a->priority < b->priority)
		return (-1);
	if (a->priority > b->priority)
		return (1);
	return (0);
}

int
knexthop_compare(struct knexthop *a, struct knexthop *b)
{
	int	i;

	if (a->nexthop.aid != b->nexthop.aid)
		return (b->nexthop.aid - a->nexthop.aid);

	switch (a->nexthop.aid) {
	case AID_INET:
		if (ntohl(a->nexthop.v4.s_addr) < ntohl(b->nexthop.v4.s_addr))
			return (-1);
		if (ntohl(a->nexthop.v4.s_addr) > ntohl(b->nexthop.v4.s_addr))
			return (1);
		break;
	case AID_INET6:
		for (i = 0; i < 16; i++) {
			if (a->nexthop.v6.s6_addr[i] < b->nexthop.v6.s6_addr[i])
				return (-1);
			if (a->nexthop.v6.s6_addr[i] > b->nexthop.v6.s6_addr[i])
				return (1);
		}
		break;
	default:
		fatalx("%s: unknown AF", __func__);
	}

	return (0);
}

int
kredist_compare(struct kredist_node *a, struct kredist_node *b)
{
	int	i;

	if (a->prefix.aid != b->prefix.aid)
		return (b->prefix.aid - a->prefix.aid);

	if (a->prefixlen < b->prefixlen)
		return (-1);
	if (a->prefixlen > b->prefixlen)
		return (1);

	switch (a->prefix.aid) {
	case AID_INET:
		if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr))
			return (-1);
		if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr))
			return (1);
		break;
	case AID_INET6:
		for (i = 0; i < 16; i++) {
			if (a->prefix.v6.s6_addr[i] < b->prefix.v6.s6_addr[i])
				return (-1);
			if (a->prefix.v6.s6_addr[i] > b->prefix.v6.s6_addr[i])
				return (1);
		}
		break;
	default:
		fatalx("%s: unknown AF", __func__);
	}

	if (a->rd < b->rd)
		return (-1);
	if (a->rd > b->rd)
		return (1);

	return (0);
}

int
kif_compare(struct kif *a, struct kif *b)
{
	return (b->ifindex - a->ifindex);
}


/*
 * tree management functions
 */

struct kroute *
kroute_find(struct ktable *kt, const struct bgpd_addr *prefix,
    uint8_t prefixlen, uint8_t prio)
{
	struct kroute	 s;
	struct kroute	*kn, *tmp;

	s.prefix = prefix->v4;
	s.prefixlen = prefixlen;
	s.priority = prio;

	kn = RB_FIND(kroute_tree, &kt->krt, &s);
	if (kn && prio == RTP_ANY) {
		tmp = RB_PREV(kroute_tree, &kt->krt, kn);
		while (tmp) {
			if (kroute_compare(&s, tmp) == 0)
				kn = tmp;
			else
				break;
			tmp = RB_PREV(kroute_tree, &kt->krt, kn);
		}
	}
	return (kn);
}

struct kroute *
kroute_matchgw(struct kroute *kr, struct kroute_full *kf)
{
	in_addr_t	nexthop;

	if (kf->flags & F_CONNECTED) {
		do {
			if (kr->ifindex == kf->ifindex)
				return (kr);
			kr = kr->next;
		} while (kr);
		return (NULL);
	}

	nexthop = kf->nexthop.v4.s_addr;
	do {
		if (kr->nexthop.s_addr == nexthop)
			return (kr);
		kr = kr->next;
	} while (kr);

	return (NULL);
}

int
kroute_insert(struct ktable *kt, struct kroute_full *kf)
{
	struct kroute	*kr, *krm;
	struct kroute6	*kr6, *kr6m;
	struct knexthop	*n;
	uint32_t	 mplslabel = 0;
	int		 multipath = 0;

	if (kf->prefix.aid == AID_VPN_IPv4 ||
	    kf->prefix.aid == AID_VPN_IPv6) {
		/* only a single MPLS label is supported for now */
		if (kf->prefix.labellen != 3) {
			log_warnx("%s/%u does not have a single label",
			    log_addr(&kf->prefix), kf->prefixlen);
			return -1;
		}
		mplslabel = (kf->prefix.labelstack[0] << 24) |
		    (kf->prefix.labelstack[1] << 16) |
		    (kf->prefix.labelstack[2] << 8);
	}

	switch (kf->prefix.aid) {
	case AID_INET:
	case AID_VPN_IPv4:
		if ((kr = calloc(1, sizeof(*kr))) == NULL) {
			log_warn("%s", __func__);
			return (-1);
		}
		kr->flags = kf->flags;
		kr->prefix = kf->prefix.v4;
		kr->prefixlen = kf->prefixlen;
		if (kf->nexthop.aid == AID_INET)
			kr->nexthop = kf->nexthop.v4;

		if (kf->prefix.aid == AID_VPN_IPv4) {
			kr->flags |= F_MPLS;
			kr->mplslabel = htonl(mplslabel);
		}

		kr->ifindex = kf->ifindex;
		kr->priority = kf->priority;
		kr->labelid = rtlabel_name2id(kf->label);

		if ((krm = RB_INSERT(kroute_tree, &kt->krt, kr)) != NULL) {
			/* multipath route, add at end of list */
			while (krm->next != NULL)
				krm = krm->next;
			krm->next = kr;
			multipath = 1;
		}

		if (kf->flags & F_BGPD)
			if (send_rtmsg(RTM_ADD, kt, kf))
				kr->flags |= F_BGPD_INSERTED;
		break;
	case AID_INET6:
	case AID_VPN_IPv6:
		if ((kr6 = calloc(1, sizeof(*kr6))) == NULL) {
			log_warn("%s", __func__);
			return (-1);
		}
		kr6->flags = kf->flags;
		kr6->prefix = kf->prefix.v6;
		kr6->prefix_scope_id = kf->prefix.scope_id;
		kr6->prefixlen = kf->prefixlen;
		if (kf->nexthop.aid == AID_INET6) {
			kr6->nexthop = kf->nexthop.v6;
			kr6->nexthop_scope_id = kf->nexthop.scope_id;
		} else
			kr6->nexthop = in6addr_any;

		if (kf->prefix.aid == AID_VPN_IPv6) {
			kr6->flags |= F_MPLS;
			kr6->mplslabel = htonl(mplslabel);
		}

		kr6->ifindex = kf->ifindex;
		kr6->priority = kf->priority;
		kr6->labelid = rtlabel_name2id(kf->label);

		if ((kr6m = RB_INSERT(kroute6_tree, &kt->krt6, kr6)) != NULL) {
			/* multipath route, add at end of list */
			while (kr6m->next != NULL)
				kr6m = kr6m->next;
			kr6m->next = kr6;
			multipath = 1;
		}

		if (kf->flags & F_BGPD)
			if (send_rtmsg(RTM_ADD, kt, kf))
				kr6->flags |= F_BGPD_INSERTED;
		break;
	}

	/* XXX this is wrong for nexthop validated via BGP */
	if (!(kf->flags & F_BGPD)) {
		RB_FOREACH(n, knexthop_tree, KT2KNT(kt))
			if (prefix_compare(&kf->prefix, &n->nexthop,
			    kf->prefixlen) == 0)
				knexthop_validate(kt, n);

		/* redistribute multipath routes only once */
		if (!multipath)
			kr_redistribute(IMSG_NETWORK_ADD, kt, kf);
	}

	return (0);
}


static int
kroute4_remove(struct ktable *kt, struct kroute_full *kf, int any)
{
	struct kroute	*kr, *krm;
	struct knexthop	*n;
	int multipath = 1;

	if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
	    kf->priority)) == NULL)
		return (-1);

	if ((kr->flags & F_BGPD) != (kf->flags & F_BGPD)) {
		log_warnx("%s: wrong type for %s/%u", __func__,
		    log_addr(&kf->prefix), kf->prefixlen);
		if (!(kf->flags & F_BGPD))
			kr->flags &= ~F_BGPD_INSERTED;
		return (-1);
	}

	/* get the correct route to remove */
	krm = kr;
	if (!any) {
		if ((krm = kroute_matchgw(kr, kf)) == NULL) {
			log_warnx("delete %s/%u: route not found",
			    log_addr(&kf->prefix), kf->prefixlen);
			return (-2);
		}
	}

	if (krm == kr) {
		/* head element */
		RB_REMOVE(kroute_tree, &kt->krt, krm);
		if (krm->next != NULL) {
			kr = krm->next;
			if (RB_INSERT(kroute_tree, &kt->krt, kr) != NULL) {
				log_warnx("%s: failed to add %s/%u",
				    __func__, inet_ntoa(kr->prefix),
				    kr->prefixlen);
				return (-2);
			}
		} else {
			multipath = 0;
		}
	} else {
		/* somewhere in the list */
		while (kr->next != krm && kr->next != NULL)
			kr = kr->next;
		if (kr->next == NULL) {
			log_warnx("%s: multipath list corrupted for %s/%u",
			    __func__, inet_ntoa(kr->prefix), kr->prefixlen);
			return (-2);
		}
		kr->next = krm->next;
	}

	/* check whether a nexthop depends on this kroute */
	if (krm->flags & F_NEXTHOP) {
		RB_FOREACH(n, knexthop_tree, KT2KNT(kt)) {
			if (n->kroute == krm)
				knexthop_validate(kt, n);
		}
	}

	*kf = *kr_tofull(krm);

	rtlabel_unref(krm->labelid);
	free(krm);
	return (multipath);
}

static int
kroute6_remove(struct ktable *kt, struct kroute_full *kf, int any)
{
	struct kroute6	*kr, *krm;
	struct knexthop	*n;
	int multipath = 1;

	if ((kr = kroute6_find(kt, &kf->prefix, kf->prefixlen,
	    kf->priority)) == NULL)
		return (-1);

	if ((kr->flags & F_BGPD) != (kf->flags & F_BGPD)) {
		log_warnx("%s: wrong type for %s/%u", __func__,
		    log_addr(&kf->prefix), kf->prefixlen);
		if (!(kf->flags & F_BGPD))
			kr->flags &= ~F_BGPD_INSERTED;
		return (-1);
	}

	/* get the correct route to remove */
	krm = kr;
	if (!any) {
		if ((krm = kroute6_matchgw(kr, kf)) == NULL) {
			log_warnx("delete %s/%u: route not found",
			    log_addr(&kf->prefix), kf->prefixlen);
			return (-2);
		}
	}

	if (krm == kr) {
		/* head element */
		RB_REMOVE(kroute6_tree, &kt->krt6, krm);
		if (krm->next != NULL) {
			kr = krm->next;
			if (RB_INSERT(kroute6_tree, &kt->krt6, kr) != NULL) {
				log_warnx("%s: failed to add %s/%u", __func__,
				    log_in6addr(&kr->prefix), kr->prefixlen);
				return (-2);
			}
		} else {
			multipath = 0;
		}
	} else {
		/* somewhere in the list */
		while (kr->next != krm && kr->next != NULL)
			kr = kr->next;
		if (kr->next == NULL) {
			log_warnx("%s: multipath list corrupted for %s/%u",
			    __func__, log_in6addr(&kr->prefix), kr->prefixlen);
			return (-2);
		}
		kr->next = krm->next;
	}

	/* check whether a nexthop depends on this kroute */
	if (krm->flags & F_NEXTHOP) {
		RB_FOREACH(n, knexthop_tree, KT2KNT(kt)) {
			if (n->kroute == krm)
				knexthop_validate(kt, n);
		}
	}

	*kf = *kr6_tofull(krm);

	rtlabel_unref(krm->labelid);
	free(krm);
	return (multipath);
}


int
kroute_remove(struct ktable *kt, struct kroute_full *kf, int any)
{
	int multipath;

	switch (kf->prefix.aid) {
	case AID_INET:
	case AID_VPN_IPv4:
		multipath = kroute4_remove(kt, kf, any);
		break;
	case AID_INET6:
	case AID_VPN_IPv6:
		multipath = kroute6_remove(kt, kf, any);
		break;
	default:
		log_warnx("%s: not handled AID", __func__);
		return (-1);
	}

	if (multipath < 0)
		return (multipath + 1);

	if (kf->flags & F_BGPD_INSERTED)
		send_rtmsg(RTM_DELETE, kt, kf);

	/* remove only once all multipath routes are gone */
	if (!(kf->flags & F_BGPD) && !multipath)
		kr_redistribute(IMSG_NETWORK_REMOVE, kt, kf);

	return (0);
}

void
kroute_clear(struct ktable *kt)
{
	struct kroute	*kr;

	while ((kr = RB_MIN(kroute_tree, &kt->krt)) != NULL)
		kroute_remove(kt, kr_tofull(kr), 1);
}

struct kroute6 *
kroute6_find(struct ktable *kt, const struct bgpd_addr *prefix,
    uint8_t prefixlen, uint8_t prio)
{
	struct kroute6	s;
	struct kroute6	*kn6, *tmp;

	s.prefix = prefix->v6;
	s.prefix_scope_id = prefix->scope_id;
	s.prefixlen = prefixlen;
	s.priority = prio;

	kn6 = RB_FIND(kroute6_tree, &kt->krt6, &s);
	if (kn6 && prio == RTP_ANY) {
		tmp = RB_PREV(kroute6_tree, &kt->krt6, kn6);
		while (tmp) {
			if (kroute6_compare(&s, tmp) == 0)
				kn6 = tmp;
			else
				break;
			tmp = RB_PREV(kroute6_tree, &kt->krt6, kn6);
		}
	}
	return (kn6);
}

struct kroute6 *
kroute6_matchgw(struct kroute6 *kr, struct kroute_full *kf)
{
	struct in6_addr	nexthop;

	if (kf->flags & F_CONNECTED) {
		do {
			if (kr->ifindex == kf->ifindex)
				return (kr);
			kr = kr->next;
		} while (kr);
		return (NULL);
	}

	nexthop = kf->nexthop.v6;
	do {
		if (memcmp(&kr->nexthop, &nexthop, sizeof(nexthop)) == 0 &&
		    kr->nexthop_scope_id == kf->nexthop.scope_id)
			return (kr);
		kr = kr->next;
	} while (kr);

	return (NULL);
}

void
kroute6_clear(struct ktable *kt)
{
	struct kroute6	*kr;

	while ((kr = RB_MIN(kroute6_tree, &kt->krt6)) != NULL)
		kroute_remove(kt, kr6_tofull(kr), 1);
}

struct knexthop *
knexthop_find(struct ktable *kt, struct bgpd_addr *addr)
{
	struct knexthop	s;

	memset(&s, 0, sizeof(s));
	memcpy(&s.nexthop, addr, sizeof(s.nexthop));

	return (RB_FIND(knexthop_tree, KT2KNT(kt), &s));
}

int
knexthop_insert(struct ktable *kt, struct knexthop *kn)
{
	if (RB_INSERT(knexthop_tree, KT2KNT(kt), kn) != NULL) {
		log_warnx("%s: failed for %s", __func__,
		    log_addr(&kn->nexthop));
		free(kn);
		return (-1);
	}

	knexthop_validate(kt, kn);

	return (0);
}

void
knexthop_remove(struct ktable *kt, struct knexthop *kn)
{
	kroute_detach_nexthop(kt, kn);
	RB_REMOVE(knexthop_tree, KT2KNT(kt), kn);
	free(kn);
}

void
knexthop_clear(struct ktable *kt)
{
	struct knexthop	*kn;

	while ((kn = RB_MIN(knexthop_tree, KT2KNT(kt))) != NULL)
		knexthop_remove(kt, kn);
}

struct kif *
kif_find(int ifindex)
{
	struct kif	s;

	memset(&s, 0, sizeof(s));
	s.ifindex = ifindex;

	return (RB_FIND(kif_tree, &kit, &s));
}

int
kif_insert(struct kif *kif)
{
	if (RB_INSERT(kif_tree, &kit, kif) != NULL) {
		log_warnx("RB_INSERT(kif_tree, &kit, kif)");
		free(kif);
		return (-1);
	}

	return (0);
}

int
kif_remove(struct kif *kif)
{
	struct ktable	*kt;

	kif->flags &= ~IFF_UP;

	/*
	 * TODO, remove all kroutes using this interface,
	 * the kernel does this for us but better to do it
	 * here as well.
	 */

	if ((kt = ktable_get(kif->rdomain)) != NULL)
		knexthop_track(kt, kif->ifindex);

	RB_REMOVE(kif_tree, &kit, kif);
	free(kif);
	return (0);
}

void
kif_clear(void)
{
	struct kif	*kif;

	while ((kif = RB_MIN(kif_tree, &kit)) != NULL)
		kif_remove(kif);
}

/*
 * nexthop validation
 */

static int
kif_validate(struct kif *kif)
{
	if (!(kif->flags & IFF_UP))
		return (0);

	/*
	 * we treat link_state == LINK_STATE_UNKNOWN as valid,
	 * not all interfaces have a concept of "link state" and/or
	 * do not report up
	 */

	if (kif->link_state == LINK_STATE_DOWN)
		return (0);

	return (1);
}

/*
 * return 1 when the interface is up and the link state is up or unknown
 * except when this is a carp interface, then return 1 only when link state
 * is up
 */
static int
kif_depend_state(struct kif *kif)
{
	if (!(kif->flags & IFF_UP))
		return (0);

	if (kif->if_type == IFT_CARP &&
	    kif->link_state == LINK_STATE_UNKNOWN)
		return (0);

	return LINK_STATE_IS_UP(kif->link_state);
}

int
kroute_validate(struct kroute *kr)
{
	struct kif	*kif;

	if (kr->flags & (F_REJECT | F_BLACKHOLE))
		return (0);

	if ((kif = kif_find(kr->ifindex)) == NULL) {
		if (kr->ifindex)
			log_warnx("%s: interface with index %d not found, "
			    "referenced from route for %s/%u", __func__,
			    kr->ifindex, inet_ntoa(kr->prefix),
			    kr->prefixlen);
		return (1);
	}

	return (kif->nh_reachable);
}

int
kroute6_validate(struct kroute6 *kr)
{
	struct kif	*kif;

	if (kr->flags & (F_REJECT | F_BLACKHOLE))
		return (0);

	if ((kif = kif_find(kr->ifindex)) == NULL) {
		if (kr->ifindex)
			log_warnx("%s: interface with index %d not found, "
			    "referenced from route for %s/%u", __func__,
			    kr->ifindex, log_in6addr(&kr->prefix),
			    kr->prefixlen);
		return (1);
	}

	return (kif->nh_reachable);
}

int
knexthop_true_nexthop(struct ktable *kt, struct kroute_full *kf)
{
	struct bgpd_addr gateway = { 0 };
	struct knexthop *kn;
	struct kroute	*kr;
	struct kroute6	*kr6;

	/*
	 * Ignore the nexthop for VPN routes. The gateway is forced
	 * to an mpe(4) interface route using an MPLS label.
	 */
	switch (kf->prefix.aid) {
	case AID_VPN_IPv4:
	case AID_VPN_IPv6:
		return 1;
	}

	kn = knexthop_find(kt, &kf->nexthop);
	if (kn == NULL) {
		log_warnx("%s: nexthop %s not found", __func__,
		    log_addr(&kf->nexthop));
		return 0;
	}
	if (kn->kroute == NULL)
		return 0;

	switch (kn->nexthop.aid) {
	case AID_INET:
		kr = kn->kroute;
		if (kr->flags & F_CONNECTED)
			return 1;
		gateway.aid = AID_INET;
		gateway.v4.s_addr = kr->nexthop.s_addr;
		break;
	case AID_INET6:
		kr6 = kn->kroute;
		if (kr6->flags & F_CONNECTED)
			return 1;
		gateway.aid = AID_INET6;
		gateway.v6 = kr6->nexthop;
		gateway.scope_id = kr6->nexthop_scope_id;
		break;
	}

	kf->nexthop = gateway;
	return 1;
}

void
knexthop_validate(struct ktable *kt, struct knexthop *kn)
{
	void		*oldk;
	struct kroute	*kr;
	struct kroute6	*kr6;

	oldk = kn->kroute;
	kroute_detach_nexthop(kt, kn);

	if ((kt = ktable_get(kt->nhtableid)) == NULL)
		fatalx("%s: lost nexthop routing table", __func__);

	switch (kn->nexthop.aid) {
	case AID_INET:
		kr = kroute_match(kt, &kn->nexthop, 0);

		if (kr != NULL) {
			kn->kroute = kr;
			kn->ifindex = kr->ifindex;
			kr->flags |= F_NEXTHOP;
		}

		/*
		 * Send update if nexthop route changed under us if
		 * the route remains the same then the NH state has not
		 * changed.
		 */
		if (kr != oldk)
			knexthop_send_update(kn);
		break;
	case AID_INET6:
		kr6 = kroute6_match(kt, &kn->nexthop, 0);

		if (kr6 != NULL) {
			kn->kroute = kr6;
			kn->ifindex = kr6->ifindex;
			kr6->flags |= F_NEXTHOP;
		}

		if (kr6 != oldk)
			knexthop_send_update(kn);
		break;
	}
}

/*
 * Called on interface state change.
 */
void
knexthop_track(struct ktable *kt, u_short ifindex)
{
	struct knexthop	*kn;

	RB_FOREACH(kn, knexthop_tree, KT2KNT(kt))
		if (kn->ifindex == ifindex)
			knexthop_validate(kt, kn);
}

/*
 * Called on route change.
 */
void
knexthop_update(struct ktable *kt, struct kroute_full *kf)
{
	struct knexthop	*kn;

	RB_FOREACH(kn, knexthop_tree, KT2KNT(kt))
		if (prefix_compare(&kf->prefix, &kn->nexthop,
		    kf->prefixlen) == 0)
			knexthop_send_update(kn);
}

void
knexthop_send_update(struct knexthop *kn)
{
	struct kroute_nexthop	 n;
	struct kroute		*kr;
	struct kroute6		*kr6;

	memset(&n, 0, sizeof(n));
	n.nexthop = kn->nexthop;

	if (kn->kroute == NULL) {
		n.valid = 0;	/* NH is not valid */
		send_nexthop_update(&n);
		return;
	}

	switch (kn->nexthop.aid) {
	case AID_INET:
		kr = kn->kroute;
		n.valid = kroute_validate(kr);
		n.connected = kr->flags & F_CONNECTED;
		if (!n.connected) {
			n.gateway.aid = AID_INET;
			n.gateway.v4.s_addr = kr->nexthop.s_addr;
		} else {
			n.gateway = n.nexthop;
			n.net.aid = AID_INET;
			n.net.v4.s_addr = kr->prefix.s_addr;
			n.netlen = kr->prefixlen;
		}
		break;
	case AID_INET6:
		kr6 = kn->kroute;
		n.valid = kroute6_validate(kr6);
		n.connected = kr6->flags & F_CONNECTED;
		if (!n.connected) {
			n.gateway.aid = AID_INET6;
			n.gateway.v6 = kr6->nexthop;
			n.gateway.scope_id = kr6->nexthop_scope_id;
		} else {
			n.gateway = n.nexthop;
			n.net.aid = AID_INET6;
			n.net.v6 = kr6->prefix;
			n.net.scope_id = kr6->prefix_scope_id;
			n.netlen = kr6->prefixlen;
		}
		break;
	}
	send_nexthop_update(&n);
}

struct kroute *
kroute_match(struct ktable *kt, struct bgpd_addr *key, int matchany)
{
	int			 i;
	struct kroute		*kr;
	struct bgpd_addr	 masked;

	for (i = 32; i >= 0; i--) {
		applymask(&masked, key, i);
		if ((kr = kroute_find(kt, &masked, i, RTP_ANY)) != NULL)
			if (matchany || bgpd_oknexthop(kr_tofull(kr)))
				return (kr);
	}

	return (NULL);
}

struct kroute6 *
kroute6_match(struct ktable *kt, struct bgpd_addr *key, int matchany)
{
	int			 i;
	struct kroute6		*kr6;
	struct bgpd_addr	 masked;

	for (i = 128; i >= 0; i--) {
		applymask(&masked, key, i);
		if ((kr6 = kroute6_find(kt, &masked, i, RTP_ANY)) != NULL)
			if (matchany || bgpd_oknexthop(kr6_tofull(kr6)))
				return (kr6);
	}

	return (NULL);
}

void
kroute_detach_nexthop(struct ktable *kt, struct knexthop *kn)
{
	struct knexthop	*s;
	struct kroute	*k;
	struct kroute6	*k6;

	if (kn->kroute == NULL)
		return;

	/*
	 * check whether there's another nexthop depending on this kroute
	 * if not remove the flag
	 */
	RB_FOREACH(s, knexthop_tree, KT2KNT(kt))
		if (s->kroute == kn->kroute && s != kn)
			break;

	if (s == NULL) {
		switch (kn->nexthop.aid) {
		case AID_INET:
			k = kn->kroute;
			k->flags &= ~F_NEXTHOP;
			break;
		case AID_INET6:
			k6 = kn->kroute;
			k6->flags &= ~F_NEXTHOP;
			break;
		}
	}

	kn->kroute = NULL;
	kn->ifindex = 0;
}

/*
 * misc helpers
 */

uint8_t
prefixlen_classful(in_addr_t ina)
{
	/* it hurt to write this. */

	if (ina >= 0xf0000000U)		/* class E */
		return (32);
	else if (ina >= 0xe0000000U)	/* class D */
		return (4);
	else if (ina >= 0xc0000000U)	/* class C */
		return (24);
	else if (ina >= 0x80000000U)	/* class B */
		return (16);
	else				/* class A */
		return (8);
}

static uint8_t
mask2prefixlen4(struct sockaddr_in *sa_in)
{
	in_addr_t ina;

	ina = sa_in->sin_addr.s_addr;
	if (ina == 0)
		return (0);
	else
		return (33 - ffs(ntohl(ina)));
}

static uint8_t
mask2prefixlen6(struct sockaddr_in6 *sa_in6)
{
	uint8_t	*ap, *ep;
	u_int	 l = 0;

	/*
	 * sin6_len is the size of the sockaddr so subtract the offset of
	 * the possibly truncated sin6_addr struct.
	 */
	ap = (uint8_t *)&sa_in6->sin6_addr;
	ep = (uint8_t *)sa_in6 + sa_in6->sin6_len;
	for (; ap < ep; ap++) {
		/* this "beauty" is adopted from sbin/route/show.c ... */
		switch (*ap) {
		case 0xff:
			l += 8;
			break;
		case 0xfe:
			l += 7;
			goto done;
		case 0xfc:
			l += 6;
			goto done;
		case 0xf8:
			l += 5;
			goto done;
		case 0xf0:
			l += 4;
			goto done;
		case 0xe0:
			l += 3;
			goto done;
		case 0xc0:
			l += 2;
			goto done;
		case 0x80:
			l += 1;
			goto done;
		case 0x00:
			goto done;
		default:
			fatalx("non contiguous inet6 netmask");
		}
	}

 done:
	if (l > sizeof(struct in6_addr) * 8)
		fatalx("%s: prefixlen %d out of bound", __func__, l);
	return (l);
}

uint8_t
mask2prefixlen(sa_family_t af, struct sockaddr *mask)
{
	switch (af) {
	case AF_INET:
		return mask2prefixlen4((struct sockaddr_in *)mask);
	case AF_INET6:
		return mask2prefixlen6((struct sockaddr_in6 *)mask);
	default:
		fatalx("%s: unsupported af", __func__);
	}
}

const struct if_status_description
		if_status_descriptions[] = LINK_STATE_DESCRIPTIONS;
const struct ifmedia_description
		ifm_type_descriptions[] = IFM_TYPE_DESCRIPTIONS;

uint64_t
ift2ifm(uint8_t if_type)
{
	switch (if_type) {
	case IFT_ETHER:
		return (IFM_ETHER);
	case IFT_FDDI:
		return (IFM_FDDI);
	case IFT_CARP:
		return (IFM_CARP);
	case IFT_IEEE80211:
		return (IFM_IEEE80211);
	default:
		return (0);
	}
}

const char *
get_media_descr(uint64_t media_type)
{
	const struct ifmedia_description	*p;

	for (p = ifm_type_descriptions; p->ifmt_string != NULL; p++)
		if (media_type == p->ifmt_word)
			return (p->ifmt_string);

	return ("unknown media");
}

const char *
get_linkstate(uint8_t if_type, int link_state)
{
	const struct if_status_description *p;
	static char buf[8];

	for (p = if_status_descriptions; p->ifs_string != NULL; p++) {
		if (LINK_STATE_DESC_MATCH(p, if_type, link_state))
			return (p->ifs_string);
	}
	snprintf(buf, sizeof(buf), "[#%d]", link_state);
	return (buf);
}

#define ROUNDUP(a) \
	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))

void
get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
{
	int	i;

	for (i = 0; i < RTAX_MAX; i++) {
		if (addrs & (1 << i)) {
			rti_info[i] = sa;
			sa = (struct sockaddr *)((char *)(sa) +
			    ROUNDUP(sa->sa_len));
		} else
			rti_info[i] = NULL;
	}
}

void
if_change(u_short ifindex, int flags, struct if_data *ifd)
{
	struct ktable		*kt;
	struct kif		*kif;
	uint8_t			 reachable;

	if ((kif = kif_find(ifindex)) == NULL) {
		log_warnx("%s: interface with index %u not found",
		    __func__, ifindex);
		return;
	}

	log_info("%s: %s: rdomain %u %s, %s, %s, %s",
	    __func__, kif->ifname, ifd->ifi_rdomain,
	    flags & IFF_UP ? "UP" : "DOWN",
	    get_media_descr(ift2ifm(ifd->ifi_type)),
	    get_linkstate(ifd->ifi_type, ifd->ifi_link_state),
	    get_baudrate(ifd->ifi_baudrate, "bps"));

	kif->flags = flags;
	kif->link_state = ifd->ifi_link_state;
	kif->if_type = ifd->ifi_type;
	kif->rdomain = ifd->ifi_rdomain;
	kif->baudrate = ifd->ifi_baudrate;
	kif->depend_state = kif_depend_state(kif);

	kr_send_dependon(kif);

	if ((reachable = kif_validate(kif)) == kif->nh_reachable)
		return;		/* nothing changed wrt nexthop validity */

	kif->nh_reachable = reachable;

	kt = ktable_get(kif->rdomain);
	if (kt == NULL)
		return;

	knexthop_track(kt, ifindex);
}

void
if_announce(void *msg)
{
	struct if_announcemsghdr	*ifan;
	struct kif			*kif;

	ifan = msg;

	switch (ifan->ifan_what) {
	case IFAN_ARRIVAL:
		if ((kif = calloc(1, sizeof(*kif))) == NULL) {
			log_warn("%s", __func__);
			return;
		}

		kif->ifindex = ifan->ifan_index;
		strlcpy(kif->ifname, ifan->ifan_name, sizeof(kif->ifname));
		kif_insert(kif);
		break;
	case IFAN_DEPARTURE:
		kif = kif_find(ifan->ifan_index);
		if (kif != NULL)
			kif_remove(kif);
		break;
	}
}

int
get_mpe_config(const char *name, u_int *rdomain, u_int *label)
{
	struct  ifreq	ifr;
	struct shim_hdr	shim;
	int		s;

	*label = 0;
	*rdomain = 0;

	s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
	if (s == -1)
		return (-1);

	memset(&shim, 0, sizeof(shim));
	memset(&ifr, 0, sizeof(ifr));
	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
	ifr.ifr_data = (caddr_t)&shim;

	if (ioctl(s, SIOCGETLABEL, (caddr_t)&ifr) == -1) {
		close(s);
		return (-1);
	}

	ifr.ifr_data = NULL;
	if (ioctl(s, SIOCGIFRDOMAIN, (caddr_t)&ifr) == -1) {
		close(s);
		return (-1);
	}

	close(s);

	*rdomain = ifr.ifr_rdomainid;
	*label = shim.shim_label;

	return (0);
}

/*
 * rtsock related functions
 */
#define satosin6(sa)	((struct sockaddr_in6 *)(sa))

int
send_rtmsg(int action, struct ktable *kt, struct kroute_full *kf)
{
	struct iovec		 iov[7];
	struct rt_msghdr	 hdr;
	struct sockaddr_storage	 prefix, nexthop, mask, ifp, label, mpls;
	struct bgpd_addr	 netmask;
	struct sockaddr		*sa;
	struct sockaddr_dl	*dl;
	struct sockaddr_mpls	*mp;
	struct sockaddr_rtlabel	*la;
	socklen_t		 salen;
	int			 iovcnt = 0;

	if (!kt->fib_sync)
		return (0);

	/* initialize header */
	memset(&hdr, 0, sizeof(hdr));
	hdr.rtm_version = RTM_VERSION;
	hdr.rtm_type = action;
	hdr.rtm_tableid = kt->rtableid;
	hdr.rtm_priority = kr_state.fib_prio;
	if (kf->flags & F_BLACKHOLE)
		hdr.rtm_flags |= RTF_BLACKHOLE;
	if (kf->flags & F_REJECT)
		hdr.rtm_flags |= RTF_REJECT;
	if (action == RTM_CHANGE)	/* reset these flags on change */
		hdr.rtm_fmask = RTF_REJECT|RTF_BLACKHOLE;
	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
	hdr.rtm_msglen = sizeof(hdr);
	/* adjust iovec */
	iov[iovcnt].iov_base = &hdr;
	iov[iovcnt++].iov_len = sizeof(hdr);

	memset(&prefix, 0, sizeof(prefix));
	sa = addr2sa(&kf->prefix, 0, &salen);
	sa->sa_len = salen;
#ifdef __KAME__
	/* XXX need to embed the stupid scope for now */
	if (sa->sa_family == AF_INET6 &&
	    (IN6_IS_ADDR_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
	    IN6_IS_ADDR_MC_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
	    IN6_IS_ADDR_MC_NODELOCAL(&satosin6(sa)->sin6_addr))) {
		*(u_int16_t *)&satosin6(sa)->sin6_addr.s6_addr[2] =
		    htons(satosin6(sa)->sin6_scope_id);
		satosin6(sa)->sin6_scope_id = 0;
	}
#endif
	memcpy(&prefix, sa, salen);
	/* adjust header */
	hdr.rtm_addrs |= RTA_DST;
	hdr.rtm_msglen += ROUNDUP(salen);
	/* adjust iovec */
	iov[iovcnt].iov_base = &prefix;
	iov[iovcnt++].iov_len = ROUNDUP(salen);

	/* XXX can we even have no nexthop ??? */
	if (kf->nexthop.aid != AID_UNSPEC) {
		memset(&nexthop, 0, sizeof(nexthop));
		sa = addr2sa(&kf->nexthop, 0, &salen);
		sa->sa_len = salen;
#ifdef __KAME__
		/* XXX need to embed the stupid scope for now */
		if (sa->sa_family == AF_INET6 &&
		    (IN6_IS_ADDR_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
		    IN6_IS_ADDR_MC_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
		    IN6_IS_ADDR_MC_NODELOCAL(&satosin6(sa)->sin6_addr))) {
			*(u_int16_t *)&satosin6(sa)->sin6_addr.s6_addr[2] =
			    htons(satosin6(sa)->sin6_scope_id);
			satosin6(sa)->sin6_scope_id = 0;
		}
#endif
		memcpy(&nexthop, sa, salen);
		/* adjust header */
		hdr.rtm_flags |= RTF_GATEWAY;
		hdr.rtm_addrs |= RTA_GATEWAY;
		hdr.rtm_msglen += ROUNDUP(salen);
		/* adjust iovec */
		iov[iovcnt].iov_base = &nexthop;
		iov[iovcnt++].iov_len = ROUNDUP(salen);
	}

	memset(&netmask, 0, sizeof(netmask));
	memset(&netmask.v6, 0xff, sizeof(netmask.v6));
	netmask.aid = kf->prefix.aid;
	applymask(&netmask, &netmask, kf->prefixlen);
	memset(&mask, 0, sizeof(mask));
	sa = addr2sa(&netmask, 0, &salen);
	sa->sa_len = salen;
	memcpy(&mask, sa, salen);
	/* adjust header */
	hdr.rtm_addrs |= RTA_NETMASK;
	hdr.rtm_msglen += ROUNDUP(salen);
	/* adjust iovec */
	iov[iovcnt].iov_base = &mask;
	iov[iovcnt++].iov_len = ROUNDUP(salen);

	if (kf->flags & F_MPLS) {
		/* need to force interface for mpe(4) routes */
		memset(&ifp, 0, sizeof(ifp));
		dl = (struct sockaddr_dl *)&ifp;
		salen = sizeof(*dl);
		dl->sdl_len = salen;
		dl->sdl_family = AF_LINK;
		dl->sdl_index = kf->ifindex;
		/* adjust header */
		hdr.rtm_addrs |= RTA_IFP;
		hdr.rtm_msglen += ROUNDUP(salen);
		/* adjust iovec */
		iov[iovcnt].iov_base = &ifp;
		iov[iovcnt++].iov_len = ROUNDUP(salen);

		memset(&mpls, 0, sizeof(mpls));
		mp = (struct sockaddr_mpls *)&mpls;
		salen = sizeof(*mp);
		mp->smpls_len = salen;
		mp->smpls_family = AF_MPLS;
		mp->smpls_label = kf->mplslabel;
		/* adjust header */
		hdr.rtm_flags |= RTF_MPLS;
		hdr.rtm_mpls = MPLS_OP_PUSH;
		hdr.rtm_addrs |= RTA_SRC;
		hdr.rtm_msglen += ROUNDUP(salen);
		/* clear gateway flag since this is for mpe(4) */
		hdr.rtm_flags &= ~RTF_GATEWAY;
		/* adjust iovec */
		iov[iovcnt].iov_base = &mpls;
		iov[iovcnt++].iov_len = ROUNDUP(salen);
	}

	if (kf->label[0] != '\0') {
		memset(&label, 0, sizeof(label));
		la = (struct sockaddr_rtlabel *)&label;
		salen = sizeof(struct sockaddr_rtlabel);
		label.ss_len = salen;
		strlcpy(la->sr_label, kf->label, sizeof(la->sr_label));
		/* adjust header */
		hdr.rtm_addrs |= RTA_LABEL;
		hdr.rtm_msglen += ROUNDUP(salen);
		/* adjust iovec */
		iov[iovcnt].iov_base = &label;
		iov[iovcnt++].iov_len = ROUNDUP(salen);
	}

retry:
	if (writev(kr_state.fd, iov, iovcnt) == -1) {
		if (errno == ESRCH) {
			if (hdr.rtm_type == RTM_CHANGE) {
				hdr.rtm_type = RTM_ADD;
				goto retry;
			} else if (hdr.rtm_type == RTM_DELETE) {
				log_info("route %s/%u vanished before delete",
				    log_addr(&kf->prefix), kf->prefixlen);
				return (1);
			}
		}
		log_warn("%s: action %u, prefix %s/%u", __func__, hdr.rtm_type,
		    log_addr(&kf->prefix), kf->prefixlen);
		return (0);
	}

	return (1);
}

int
fetchtable(struct ktable *kt)
{
	size_t			 len;
	int			 mib[7];
	char			*buf = NULL, *next, *lim;
	struct rt_msghdr	*rtm;
	struct kroute_full	 kf;

	mib[0] = CTL_NET;
	mib[1] = PF_ROUTE;
	mib[2] = 0;
	mib[3] = 0;
	mib[4] = NET_RT_DUMP;
	mib[5] = 0;
	mib[6] = kt->rtableid;

	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
		if (kt->rtableid != 0 && errno == EINVAL)
			/* table nonexistent */
			return (0);
		log_warn("%s: sysctl", __func__);
		return (-1);
	}
	if (len > 0) {
		if ((buf = malloc(len)) == NULL) {
			log_warn("%s", __func__);
			return (-1);
		}
		if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
			log_warn("%s: sysctl2", __func__);
			free(buf);
			return (-1);
		}
	}

	lim = buf + len;
	for (next = buf; next < lim; next += rtm->rtm_msglen) {
		rtm = (struct rt_msghdr *)next;
		if (rtm->rtm_version != RTM_VERSION)
			continue;

		if (dispatch_rtmsg_addr(rtm, &kf) == -1)
			continue;

		if (kf.priority == RTP_MINE)
			send_rtmsg(RTM_DELETE, kt, &kf);
		else
			kroute_insert(kt, &kf);
	}
	free(buf);
	return (0);
}

int
fetchifs(int ifindex)
{
	size_t			 len;
	int			 mib[6];
	char			*buf, *next, *lim;
	struct if_msghdr	 ifm;
	struct kif		*kif;
	struct sockaddr		*sa, *rti_info[RTAX_MAX];
	struct sockaddr_dl	*sdl;

	mib[0] = CTL_NET;
	mib[1] = PF_ROUTE;
	mib[2] = 0;
	mib[3] = AF_INET;	/* AF does not matter but AF_INET is shorter */
	mib[4] = NET_RT_IFLIST;
	mib[5] = ifindex;

	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
		log_warn("%s: sysctl", __func__);
		return (-1);
	}
	if ((buf = malloc(len)) == NULL) {
		log_warn("%s", __func__);
		return (-1);
	}
	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
		log_warn("%s: sysctl2", __func__);
		free(buf);
		return (-1);
	}

	lim = buf + len;
	for (next = buf; next < lim; next += ifm.ifm_msglen) {
		memcpy(&ifm, next, sizeof(ifm));
		if (ifm.ifm_version != RTM_VERSION)
			continue;
		if (ifm.ifm_type != RTM_IFINFO)
			continue;

		sa = (struct sockaddr *)(next + sizeof(ifm));
		get_rtaddrs(ifm.ifm_addrs, sa, rti_info);

		if ((kif = calloc(1, sizeof(*kif))) == NULL) {
			log_warn("%s", __func__);
			free(buf);
			return (-1);
		}

		kif->ifindex = ifm.ifm_index;
		kif->flags = ifm.ifm_flags;
		kif->link_state = ifm.ifm_data.ifi_link_state;
		kif->if_type = ifm.ifm_data.ifi_type;
		kif->rdomain = ifm.ifm_data.ifi_rdomain;
		kif->baudrate = ifm.ifm_data.ifi_baudrate;
		kif->nh_reachable = kif_validate(kif);
		kif->depend_state = kif_depend_state(kif);

		if ((sa = rti_info[RTAX_IFP]) != NULL)
			if (sa->sa_family == AF_LINK) {
				sdl = (struct sockaddr_dl *)sa;
				if (sdl->sdl_nlen >= sizeof(kif->ifname))
					memcpy(kif->ifname, sdl->sdl_data,
					    sizeof(kif->ifname) - 1);
				else if (sdl->sdl_nlen > 0)
					memcpy(kif->ifname, sdl->sdl_data,
					    sdl->sdl_nlen);
				/* string already terminated via calloc() */
			}

		kif_insert(kif);
	}
	free(buf);
	return (0);
}

int
dispatch_rtmsg(void)
{
	char			 buf[RT_BUF_SIZE];
	ssize_t			 n;
	char			*next, *lim;
	struct rt_msghdr	*rtm;
	struct if_msghdr	 ifm;
	struct kroute_full	 kf;
	struct ktable		*kt;
	int			 mpath = 0;

	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
		if (errno == EAGAIN || errno == EINTR)
			return (0);
		log_warn("%s: read error", __func__);
		return (-1);
	}

	if (n == 0) {
		log_warnx("routing socket closed");
		return (-1);
	}

	lim = buf + n;
	for (next = buf; next < lim; next += rtm->rtm_msglen) {
		rtm = (struct rt_msghdr *)next;
		if (lim < next + sizeof(u_short) ||
		    lim < next + rtm->rtm_msglen)
			fatalx("%s: partial rtm in buffer", __func__);
		if (rtm->rtm_version != RTM_VERSION)
			continue;

		switch (rtm->rtm_type) {
		case RTM_ADD:
		case RTM_CHANGE:
		case RTM_DELETE:
			if (rtm->rtm_pid == kr_state.pid) /* cause by us */
				continue;

			/* failed attempts */
			if (rtm->rtm_errno || !(rtm->rtm_flags & RTF_DONE))
				continue;

			if ((kt = ktable_get(rtm->rtm_tableid)) == NULL)
				continue;

			if (dispatch_rtmsg_addr(rtm, &kf) == -1)
				continue;

			if (rtm->rtm_flags & RTF_MPATH)
				mpath = 1;

			switch (rtm->rtm_type) {
			case RTM_ADD:
			case RTM_CHANGE:
				if (kr_fib_change(kt, &kf, rtm->rtm_type,
				    mpath) == -1)
					return -1;
				break;
			case RTM_DELETE:
				if (kr_fib_delete(kt, &kf, mpath) == -1)
					return -1;
				break;
			}
			break;
		case RTM_IFINFO:
			memcpy(&ifm, next, sizeof(ifm));
			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data);
			break;
		case RTM_IFANNOUNCE:
			if_announce(next);
			break;
		default:
			/* ignore for now */
			break;
		}
	}
	return (0);
}

int
dispatch_rtmsg_addr(struct rt_msghdr *rtm, struct kroute_full *kf)
{
	struct sockaddr		*sa, *rti_info[RTAX_MAX];
	struct sockaddr_in	*sa_in;
	struct sockaddr_in6	*sa_in6;
	struct sockaddr_rtlabel	*label;

	sa = (struct sockaddr *)((char *)rtm + rtm->rtm_hdrlen);
	get_rtaddrs(rtm->rtm_addrs, sa, rti_info);

	/* Skip ARP/ND cache, broadcast and dynamic routes. */
	if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST|RTF_DYNAMIC))
		return (-1);

	if ((sa = rti_info[RTAX_DST]) == NULL) {
		log_warnx("route message without destination");
		return (-1);
	}

	memset(kf, 0, sizeof(*kf));

	if (rtm->rtm_flags & RTF_STATIC)
		kf->flags |= F_STATIC;
	if (rtm->rtm_flags & RTF_BLACKHOLE)
		kf->flags |= F_BLACKHOLE;
	if (rtm->rtm_flags & RTF_REJECT)
		kf->flags |= F_REJECT;

	/* adjust priority here */
	if (rtm->rtm_priority == kr_state.fib_prio)
		kf->priority = RTP_MINE;
	else
		kf->priority = rtm->rtm_priority;

	label = (struct sockaddr_rtlabel *)rti_info[RTAX_LABEL];
	if (label != NULL)
		if (strlcpy(kf->label, label->sr_label, sizeof(kf->label)) >=
		    sizeof(kf->label))
			fatalx("rtm label overflow");

	sa2addr(sa, &kf->prefix, NULL);
	switch (sa->sa_family) {
	case AF_INET:
		sa_in = (struct sockaddr_in *)rti_info[RTAX_NETMASK];
		if (rtm->rtm_flags & RTF_HOST)
			kf->prefixlen = 32;
		else if (sa_in != NULL)
			kf->prefixlen = mask2prefixlen4(sa_in);
		else
			kf->prefixlen =
			    prefixlen_classful(kf->prefix.v4.s_addr);
		break;
	case AF_INET6:
		sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK];
		if (rtm->rtm_flags & RTF_HOST)
			kf->prefixlen = 128;
		else if (sa_in6 != NULL)
			kf->prefixlen = mask2prefixlen6(sa_in6);
		else
			fatalx("in6 net addr without netmask");
		break;
	default:
		return (-1);
	}

	if ((sa = rti_info[RTAX_GATEWAY]) == NULL) {
		log_warnx("route %s/%u without gateway",
		    log_addr(&kf->prefix), kf->prefixlen);
		return (-1);
	}

	kf->ifindex = rtm->rtm_index;
	if (rtm->rtm_flags & RTF_GATEWAY) {
		switch (sa->sa_family) {
		case AF_LINK:
			kf->flags |= F_CONNECTED;
			break;
		case AF_INET:
		case AF_INET6:
			sa2addr(rti_info[RTAX_GATEWAY], &kf->nexthop, NULL);
			break;
		}
	} else {
		kf->flags |= F_CONNECTED;
	}

	return (0);
}

int
kr_fib_delete(struct ktable *kt, struct kroute_full *kf, int mpath)
{
	return kroute_remove(kt, kf, !mpath);
}

int
kr_fib_change(struct ktable *kt, struct kroute_full *kf, int type, int mpath)
{
	struct kroute	*kr;
	struct kroute6	*kr6;
	int		 flags, oflags;
	int		 changed = 0, rtlabel_changed = 0;
	uint16_t	 new_labelid;

	flags = kf->flags;
	switch (kf->prefix.aid) {
	case AID_INET:
		if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
		    kf->priority)) != NULL) {
			if (!(kf->flags & F_BGPD)) {
				/* get the correct route */
				if (mpath && type == RTM_CHANGE &&
				    (kr = kroute_matchgw(kr, kf)) == NULL) {
					log_warnx("%s[change]: "
					    "mpath route not found", __func__);
					goto add4;
				} else if (mpath && type == RTM_ADD)
					goto add4;

				if (kf->nexthop.aid == AID_INET) {
					if (kr->nexthop.s_addr !=
					    kf->nexthop.v4.s_addr)
						changed = 1;
					kr->nexthop.s_addr =
					    kf->nexthop.v4.s_addr;
					kr->ifindex = kf->ifindex;
				} else {
					if (kr->nexthop.s_addr != 0)
						changed = 1;
					kr->nexthop.s_addr = 0;
					kr->ifindex = kf->ifindex;
				}

				if (kr->flags & F_NEXTHOP)
					flags |= F_NEXTHOP;

				new_labelid = rtlabel_name2id(kf->label);
				if (kr->labelid != new_labelid) {
					rtlabel_unref(kr->labelid);
					kr->labelid = new_labelid;
					rtlabel_changed = 1;
				}

				oflags = kr->flags;
				if (flags != oflags)
					changed = 1;
				kr->flags = flags;

				if (rtlabel_changed)
					kr_redistribute(IMSG_NETWORK_ADD,
					    kt, kr_tofull(kr));

				if ((oflags & F_CONNECTED) &&
				    !(flags & F_CONNECTED))
					kr_redistribute(IMSG_NETWORK_ADD,
					    kt, kr_tofull(kr));
				if ((flags & F_CONNECTED) &&
				    !(oflags & F_CONNECTED))
					kr_redistribute(IMSG_NETWORK_ADD,
					    kt, kr_tofull(kr));

				if (kr->flags & F_NEXTHOP && changed)
					knexthop_update(kt, kf);
			} else {
				kr->flags &= ~F_BGPD_INSERTED;
			}
		} else {
add4:
			kroute_insert(kt, kf);
		}
		break;
	case AID_INET6:
		if ((kr6 = kroute6_find(kt, &kf->prefix, kf->prefixlen,
		    kf->priority)) != NULL) {
			if (!(kf->flags & F_BGPD)) {
				/* get the correct route */
				if (mpath && type == RTM_CHANGE &&
				    (kr6 = kroute6_matchgw(kr6, kf)) == NULL) {
					log_warnx("%s[change]: IPv6 mpath "
					    "route not found", __func__);
					goto add6;
				} else if (mpath && type == RTM_ADD)
					goto add6;

				if (kf->nexthop.aid == AID_INET6) {
					if (memcmp(&kr6->nexthop,
					    &kf->nexthop.v6,
					    sizeof(struct in6_addr)) ||
					    kr6->nexthop_scope_id !=
					    kf->nexthop.scope_id)
						changed = 1;
					kr6->nexthop = kf->nexthop.v6;
					kr6->nexthop_scope_id =
					    kf->nexthop.scope_id;
					kr6->ifindex = kf->ifindex;
				} else {
					if (memcmp(&kr6->nexthop,
					    &in6addr_any,
					    sizeof(struct in6_addr)))
						changed = 1;
					kr6->nexthop = in6addr_any;
					kr6->nexthop_scope_id = 0;
					kr6->ifindex = kf->ifindex;
				}

				if (kr6->flags & F_NEXTHOP)
					flags |= F_NEXTHOP;

				new_labelid = rtlabel_name2id(kf->label);
				if (kr6->labelid != new_labelid) {
					rtlabel_unref(kr6->labelid);
					kr6->labelid = new_labelid;
					rtlabel_changed = 1;
				}

				oflags = kr6->flags;
				if (flags != oflags)
					changed = 1;
				kr6->flags = flags;

				if (rtlabel_changed)
					kr_redistribute(IMSG_NETWORK_ADD,
					    kt, kr6_tofull(kr6));

				if ((oflags & F_CONNECTED) &&
				    !(flags & F_CONNECTED))
					kr_redistribute(IMSG_NETWORK_ADD,
					    kt, kr6_tofull(kr6));
				if ((flags & F_CONNECTED) &&
				    !(oflags & F_CONNECTED))
					kr_redistribute(IMSG_NETWORK_ADD,
					    kt, kr6_tofull(kr6));

				if (kr6->flags & F_NEXTHOP && changed)
					knexthop_update(kt, kf);
			} else {
				kr6->flags &= ~F_BGPD_INSERTED;
			}
		} else {
add6:
			kroute_insert(kt, kf);
		}
		break;
	}

	return (0);
}