[BACK]Return to mta.c CVS log [TXT][DIR] Up to [local] / src / usr.sbin / smtpd

File: [local] / src / usr.sbin / smtpd / mta.c (download)

Revision 1.248, Tue Apr 23 13:34:51 2024 UTC (5 weeks, 4 days ago) by jsg
Branch: MAIN
CVS Tags: HEAD
Changes since 1.247: +2 -2 lines

correct indentation; no functional change
ok tb@

/*	$OpenBSD: mta.c,v 1.248 2024/04/23 13:34:51 jsg Exp $	*/

/*
 * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@openbsd.org>
 * Copyright (c) 2008 Gilles Chehade <gilles@poolp.org>
 * Copyright (c) 2009 Jacek Masiulaniec <jacekm@dobremiasto.net>
 * Copyright (c) 2012 Eric Faurot <eric@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <tls.h>

#include "smtpd.h"
#include "log.h"
#include "ssl.h"

#define MAXERROR_PER_ROUTE	4

#define DELAY_CHECK_SOURCE	1
#define DELAY_CHECK_SOURCE_SLOW	10
#define DELAY_CHECK_SOURCE_FAST 0
#define DELAY_CHECK_LIMIT	5

#define	DELAY_QUADRATIC		1
#define DELAY_ROUTE_BASE	15
#define DELAY_ROUTE_MAX		3600

#define RELAY_ONHOLD		0x01
#define RELAY_HOLDQ		0x02

static void mta_setup_dispatcher(struct dispatcher *);
static void mta_handle_envelope(struct envelope *, const char *);
static void mta_query_smarthost(struct envelope *);
static void mta_on_smarthost(struct envelope *, const char *);
static void mta_query_mx(struct mta_relay *);
static void mta_query_secret(struct mta_relay *);
static void mta_query_preference(struct mta_relay *);
static void mta_query_source(struct mta_relay *);
static void mta_on_mx(void *, void *, void *);
static void mta_on_secret(struct mta_relay *, const char *);
static void mta_on_preference(struct mta_relay *, int);
static void mta_on_source(struct mta_relay *, struct mta_source *);
static void mta_on_timeout(struct runq *, void *);
static void mta_connect(struct mta_connector *);
static void mta_route_enable(struct mta_route *);
static void mta_route_disable(struct mta_route *, int, int);
static void mta_drain(struct mta_relay *);
static void mta_delivery_flush_event(int, short, void *);
static void mta_flush(struct mta_relay *, int, const char *);
static struct mta_route *mta_find_route(struct mta_connector *, time_t, int*,
    time_t*, struct mta_mx **);
static void mta_log(const struct mta_envelope *, const char *, const char *,
    const char *, const char *);

SPLAY_HEAD(mta_relay_tree, mta_relay);
static struct mta_relay *mta_relay(struct envelope *, struct relayhost *);
static void mta_relay_ref(struct mta_relay *);
static void mta_relay_unref(struct mta_relay *);
static void mta_relay_show(struct mta_relay *, struct mproc *, uint32_t, time_t);
static int mta_relay_cmp(const struct mta_relay *, const struct mta_relay *);
SPLAY_PROTOTYPE(mta_relay_tree, mta_relay, entry, mta_relay_cmp);

SPLAY_HEAD(mta_host_tree, mta_host);
static struct mta_host *mta_host(const struct sockaddr *);
static void mta_host_ref(struct mta_host *);
static void mta_host_unref(struct mta_host *);
static int mta_host_cmp(const struct mta_host *, const struct mta_host *);
SPLAY_PROTOTYPE(mta_host_tree, mta_host, entry, mta_host_cmp);

SPLAY_HEAD(mta_domain_tree, mta_domain);
static struct mta_domain *mta_domain(char *, int);
#if 0
static void mta_domain_ref(struct mta_domain *);
#endif
static void mta_domain_unref(struct mta_domain *);
static int mta_domain_cmp(const struct mta_domain *, const struct mta_domain *);
SPLAY_PROTOTYPE(mta_domain_tree, mta_domain, entry, mta_domain_cmp);

SPLAY_HEAD(mta_source_tree, mta_source);
static struct mta_source *mta_source(const struct sockaddr *);
static void mta_source_ref(struct mta_source *);
static void mta_source_unref(struct mta_source *);
static const char *mta_source_to_text(struct mta_source *);
static int mta_source_cmp(const struct mta_source *, const struct mta_source *);
SPLAY_PROTOTYPE(mta_source_tree, mta_source, entry, mta_source_cmp);

static struct mta_connector *mta_connector(struct mta_relay *,
    struct mta_source *);
static void mta_connector_free(struct mta_connector *);
static const char *mta_connector_to_text(struct mta_connector *);

SPLAY_HEAD(mta_route_tree, mta_route);
static struct mta_route *mta_route(struct mta_source *, struct mta_host *);
static void mta_route_ref(struct mta_route *);
static void mta_route_unref(struct mta_route *);
static const char *mta_route_to_text(struct mta_route *);
static int mta_route_cmp(const struct mta_route *, const struct mta_route *);
SPLAY_PROTOTYPE(mta_route_tree, mta_route, entry, mta_route_cmp);

struct mta_block {
	SPLAY_ENTRY(mta_block)	 entry;
	struct mta_source	*source;
	char			*domain;
};

SPLAY_HEAD(mta_block_tree, mta_block);
void mta_block(struct mta_source *, char *);
void mta_unblock(struct mta_source *, char *);
int mta_is_blocked(struct mta_source *, char *);
static int mta_block_cmp(const struct mta_block *, const struct mta_block *);
SPLAY_PROTOTYPE(mta_block_tree, mta_block, entry, mta_block_cmp);

/*
 * This function is not publicy exported because it is a hack until libtls
 * has a proper privsep setup
 */
void tls_config_use_fake_private_key(struct tls_config *config);

static struct mta_relay_tree		relays;
static struct mta_domain_tree		domains;
static struct mta_host_tree		hosts;
static struct mta_source_tree		sources;
static struct mta_route_tree		routes;
static struct mta_block_tree		blocks;

static struct tree wait_mx;
static struct tree wait_preference;
static struct tree wait_secret;
static struct tree wait_smarthost;
static struct tree wait_source;
static struct tree flush_evp;
static struct event ev_flush_evp;

static struct runq *runq_relay;
static struct runq *runq_connector;
static struct runq *runq_route;
static struct runq *runq_hoststat;

static time_t	max_seen_conndelay_route;
static time_t	max_seen_discdelay_route;

#define	HOSTSTAT_EXPIRE_DELAY	(4 * 3600)
struct hoststat {
	char			 name[HOST_NAME_MAX+1];
	time_t			 tm;
	char			 error[LINE_MAX];
	struct tree		 deferred;
};
static struct dict hoststat;

void mta_hoststat_update(const char *, const char *);
void mta_hoststat_cache(const char *, uint64_t);
void mta_hoststat_uncache(const char *, uint64_t);
void mta_hoststat_reschedule(const char *);
static void mta_hoststat_remove_entry(struct hoststat *);

void
mta_imsg(struct mproc *p, struct imsg *imsg)
{
	struct mta_relay	*relay;
	struct mta_domain	*domain;
	struct mta_host		*host;
	struct mta_route	*route;
	struct mta_block	*block;
	struct mta_mx		*mx, *imx;
	struct mta_source	*source;
	struct hoststat		*hs;
	struct sockaddr_storage	 ss;
	struct envelope		 evp, *e;
	struct msg		 m;
	const char		*secret;
	const char		*hostname;
	const char		*dom;
	const char		*smarthost;
	uint64_t		 reqid;
	time_t			 t;
	char			 buf[LINE_MAX];
	int			 dnserror, preference, v, status;
	void			*iter;
	uint64_t		 u64;

	switch (imsg->hdr.type) {
	case IMSG_QUEUE_TRANSFER:
		m_msg(&m, imsg);
		m_get_envelope(&m, &evp);
		m_end(&m);
		mta_handle_envelope(&evp, NULL);
		return;

	case IMSG_MTA_OPEN_MESSAGE:
		mta_session_imsg(p, imsg);
		return;

	case IMSG_MTA_LOOKUP_CREDENTIALS:
		m_msg(&m, imsg);
		m_get_id(&m, &reqid);
		m_get_string(&m, &secret);
		m_end(&m);
		relay = tree_xpop(&wait_secret, reqid);
		mta_on_secret(relay, secret[0] ? secret : NULL);
		return;

	case IMSG_MTA_LOOKUP_SOURCE:
		m_msg(&m, imsg);
		m_get_id(&m, &reqid);
		m_get_int(&m, &status);
		if (status == LKA_OK)
			m_get_sockaddr(&m, (struct sockaddr*)&ss);
		m_end(&m);

		relay = tree_xpop(&wait_source, reqid);
		mta_on_source(relay, (status == LKA_OK) ?
		    mta_source((struct sockaddr *)&ss) : NULL);
		return;

	case IMSG_MTA_LOOKUP_SMARTHOST:
		m_msg(&m, imsg);
		m_get_id(&m, &reqid);
		m_get_int(&m, &status);
		smarthost = NULL;
		if (status == LKA_OK)
			m_get_string(&m, &smarthost);
		m_end(&m);

		e = tree_xpop(&wait_smarthost, reqid);
		mta_on_smarthost(e, smarthost);
		return;

	case IMSG_MTA_LOOKUP_HELO:
		mta_session_imsg(p, imsg);
		return;

	case IMSG_MTA_DNS_HOST:
		m_msg(&m, imsg);
		m_get_id(&m, &reqid);
		m_get_string(&m, &hostname);
		m_get_sockaddr(&m, (struct sockaddr*)&ss);
		m_get_int(&m, &preference);
		m_end(&m);
		domain = tree_xget(&wait_mx, reqid);
		mx = xcalloc(1, sizeof *mx);
		mx->mxname = xstrdup(hostname);
		mx->host = mta_host((struct sockaddr*)&ss);
		mx->preference = preference;
		TAILQ_FOREACH(imx, &domain->mxs, entry) {
			if (imx->preference > mx->preference) {
				TAILQ_INSERT_BEFORE(imx, mx, entry);
				return;
			}
		}
		TAILQ_INSERT_TAIL(&domain->mxs, mx, entry);
		return;

	case IMSG_MTA_DNS_HOST_END:
		m_msg(&m, imsg);
		m_get_id(&m, &reqid);
		m_get_int(&m, &dnserror);
		m_end(&m);
		domain = tree_xpop(&wait_mx, reqid);
		domain->mxstatus = dnserror;
		if (domain->mxstatus == DNS_OK) {
			log_debug("debug: MXs for domain %s:",
			    domain->name);
			TAILQ_FOREACH(mx, &domain->mxs, entry)
				log_debug("	%s preference %d",
				    sa_to_text(mx->host->sa),
				    mx->preference);
		}
		else {
			log_debug("debug: Failed MX query for %s:",
			    domain->name);
		}
		domain->lastmxquery = time(NULL);
		waitq_run(&domain->mxs, domain);
		return;

	case IMSG_MTA_DNS_MX_PREFERENCE:
		m_msg(&m, imsg);
		m_get_id(&m, &reqid);
		m_get_int(&m, &dnserror);
		if (dnserror == 0)
			m_get_int(&m, &preference);
		m_end(&m);

		relay = tree_xpop(&wait_preference, reqid);
		if (dnserror) {
			log_warnx("warn: Couldn't find backup "
			    "preference for %s: error %d",
			    mta_relay_to_text(relay), dnserror);
			preference = INT_MAX;
		}
		mta_on_preference(relay, preference);
		return;

	case IMSG_CTL_RESUME_ROUTE:
		u64 = *((uint64_t *)imsg->data);
		if (u64)
			log_debug("resuming route: %llu",
			    (unsigned long long)u64);
		else
			log_debug("resuming all routes");
		SPLAY_FOREACH(route, mta_route_tree, &routes) {
			if (u64 && route->id != u64)
				continue;

			if (route->flags & ROUTE_DISABLED) {
				log_info("smtp-out: Enabling route %s per admin request",
				    mta_route_to_text(route));
				if (!runq_cancel(runq_route, route)) {
					log_warnx("warn: route not on runq");
					fatalx("exiting");
				}
				route->flags &= ~ROUTE_DISABLED;
				route->flags |= ROUTE_NEW;
				route->nerror = 0;
				route->penalty = 0;
				mta_route_unref(route); /* from mta_route_disable */
			}

			if (u64)
				break;
		}
		return;

	case IMSG_CTL_MTA_SHOW_HOSTS:
		t = time(NULL);
		SPLAY_FOREACH(host, mta_host_tree, &hosts) {
			(void)snprintf(buf, sizeof(buf),
			    "%s %s refcount=%d nconn=%zu lastconn=%s",
			    sockaddr_to_text(host->sa),
			    host->ptrname,
			    host->refcount,
			    host->nconn,
			    host->lastconn ? duration_to_text(t - host->lastconn) : "-");
			m_compose(p, IMSG_CTL_MTA_SHOW_HOSTS,
			    imsg->hdr.peerid, 0, -1,
			    buf, strlen(buf) + 1);
		}
		m_compose(p, IMSG_CTL_MTA_SHOW_HOSTS, imsg->hdr.peerid,
		    0, -1, NULL, 0);
		return;

	case IMSG_CTL_MTA_SHOW_RELAYS:
		t = time(NULL);
		SPLAY_FOREACH(relay, mta_relay_tree, &relays)
			mta_relay_show(relay, p, imsg->hdr.peerid, t);
		m_compose(p, IMSG_CTL_MTA_SHOW_RELAYS, imsg->hdr.peerid,
		    0, -1, NULL, 0);
		return;

	case IMSG_CTL_MTA_SHOW_ROUTES:
		SPLAY_FOREACH(route, mta_route_tree, &routes) {
			v = runq_pending(runq_route, route, &t);
			(void)snprintf(buf, sizeof(buf),
			    "%llu. %s %c%c%c%c nconn=%zu nerror=%d penalty=%d timeout=%s",
			    (unsigned long long)route->id,
			    mta_route_to_text(route),
			    route->flags & ROUTE_NEW ? 'N' : '-',
			    route->flags & ROUTE_DISABLED ? 'D' : '-',
			    route->flags & ROUTE_RUNQ ? 'Q' : '-',
			    route->flags & ROUTE_KEEPALIVE ? 'K' : '-',
			    route->nconn,
			    route->nerror,
			    route->penalty,
			    v ? duration_to_text(t - time(NULL)) : "-");
			m_compose(p, IMSG_CTL_MTA_SHOW_ROUTES,
			    imsg->hdr.peerid, 0, -1,
			    buf, strlen(buf) + 1);
		}
		m_compose(p, IMSG_CTL_MTA_SHOW_ROUTES, imsg->hdr.peerid,
		    0, -1, NULL, 0);
		return;

	case IMSG_CTL_MTA_SHOW_HOSTSTATS:
		iter = NULL;
		while (dict_iter(&hoststat, &iter, &hostname,
			(void **)&hs)) {
			(void)snprintf(buf, sizeof(buf),
			    "%s|%llu|%s",
			    hostname, (unsigned long long) hs->tm,
			    hs->error);
			m_compose(p, IMSG_CTL_MTA_SHOW_HOSTSTATS,
			    imsg->hdr.peerid, 0, -1,
			    buf, strlen(buf) + 1);
		}
		m_compose(p, IMSG_CTL_MTA_SHOW_HOSTSTATS,
		    imsg->hdr.peerid,
		    0, -1, NULL, 0);
		return;

	case IMSG_CTL_MTA_BLOCK:
		m_msg(&m, imsg);
		m_get_sockaddr(&m, (struct sockaddr*)&ss);
		m_get_string(&m, &dom);
		m_end(&m);
		source = mta_source((struct sockaddr*)&ss);
		if (*dom != '\0') {
			if (!(strlcpy(buf, dom, sizeof(buf))
				>= sizeof(buf)))
				mta_block(source, buf);
		}
		else
			mta_block(source, NULL);
		mta_source_unref(source);
		m_compose(p, IMSG_CTL_OK, imsg->hdr.peerid, 0, -1, NULL, 0);
		return;

	case IMSG_CTL_MTA_UNBLOCK:
		m_msg(&m, imsg);
		m_get_sockaddr(&m, (struct sockaddr*)&ss);
		m_get_string(&m, &dom);
		m_end(&m);
		source = mta_source((struct sockaddr*)&ss);
		if (*dom != '\0') {
			if (!(strlcpy(buf, dom, sizeof(buf))
				>= sizeof(buf)))
				mta_unblock(source, buf);
		}
		else
			mta_unblock(source, NULL);
		mta_source_unref(source);
		m_compose(p, IMSG_CTL_OK, imsg->hdr.peerid, 0, -1, NULL, 0);
		return;

	case IMSG_CTL_MTA_SHOW_BLOCK:
		SPLAY_FOREACH(block, mta_block_tree, &blocks) {
			(void)snprintf(buf, sizeof(buf), "%s -> %s",
			    mta_source_to_text(block->source),
			    block->domain ? block->domain : "*");
			m_compose(p, IMSG_CTL_MTA_SHOW_BLOCK,
			    imsg->hdr.peerid, 0, -1, buf, strlen(buf) + 1);
		}
		m_compose(p, IMSG_CTL_MTA_SHOW_BLOCK, imsg->hdr.peerid,
		    0, -1, NULL, 0);
		return;
	}

	fatalx("mta_imsg: unexpected %s imsg", imsg_to_str(imsg->hdr.type));
}

void
mta_postfork(void)
{
	struct dispatcher *dispatcher;
	const char *key;
	void *iter;

	iter = NULL;
	while (dict_iter(env->sc_dispatchers, &iter, &key, (void **)&dispatcher)) {
		log_debug("%s: %s", __func__, key);
		mta_setup_dispatcher(dispatcher);
	}
}

static void
mta_setup_dispatcher(struct dispatcher *dispatcher)
{
	struct dispatcher_remote *remote;
	static const char *dheparams[] = { "none", "auto", "legacy" };
	struct tls_config *config;
	struct pki *pki;
	struct ca *ca;
	const char *ciphers;
	uint32_t protos;

	if (dispatcher->type != DISPATCHER_REMOTE)
		return;

	remote = &dispatcher->u.remote;

	if ((config = tls_config_new()) == NULL)
		fatal("smtpd: tls_config_new");

	ciphers = env->sc_tls_ciphers;
	if (remote->tls_ciphers)
		ciphers = remote->tls_ciphers;
	if (ciphers && tls_config_set_ciphers(config, ciphers) == -1)
		fatalx("%s", tls_config_error(config));

	if (remote->tls_protocols) {
		if (tls_config_parse_protocols(&protos,
		    remote->tls_protocols) == -1)
			fatalx("failed to parse protocols \"%s\"",
			    remote->tls_protocols);
		if (tls_config_set_protocols(config, protos) == -1)
			fatalx("%s", tls_config_error(config));
	}

	if (remote->pki) {
		pki = dict_get(env->sc_pki_dict, remote->pki);
		if (pki == NULL)
			fatalx("client pki \"%s\" not found", remote->pki);

		tls_config_set_dheparams(config, dheparams[pki->pki_dhe]);
		tls_config_use_fake_private_key(config);
		if (tls_config_set_keypair_mem(config, pki->pki_cert,
		    pki->pki_cert_len, NULL, 0) == -1)
			fatalx("tls_config_set_keypair_mem: %s",
			    tls_config_error(config));
	}

	if (remote->ca) {
		ca = dict_get(env->sc_ca_dict, remote->ca);
		if (tls_config_set_ca_mem(config, ca->ca_cert, ca->ca_cert_len)
		    == -1)
			fatalx("tls_config_set_ca_mem: %s",
			    tls_config_error(config));
	}
	else if (tls_config_set_ca_file(config, tls_default_ca_cert_file())
	    == -1)
		fatalx("tls_config_set_ca_file: %s",
		    tls_config_error(config));

	if (remote->tls_verify) {
		tls_config_verify(config);
	} else {
		tls_config_insecure_noverifycert(config);
		tls_config_insecure_noverifyname(config);
		tls_config_insecure_noverifytime(config);
	}

	remote->tls_config = config;
}

void
mta_postprivdrop(void)
{
	SPLAY_INIT(&relays);
	SPLAY_INIT(&domains);
	SPLAY_INIT(&hosts);
	SPLAY_INIT(&sources);
	SPLAY_INIT(&routes);
	SPLAY_INIT(&blocks);

	tree_init(&wait_secret);
	tree_init(&wait_smarthost);
	tree_init(&wait_mx);
	tree_init(&wait_preference);
	tree_init(&wait_source);
	tree_init(&flush_evp);
	dict_init(&hoststat);

	evtimer_set(&ev_flush_evp, mta_delivery_flush_event, NULL);

	runq_init(&runq_relay, mta_on_timeout);
	runq_init(&runq_connector, mta_on_timeout);
	runq_init(&runq_route, mta_on_timeout);
	runq_init(&runq_hoststat, mta_on_timeout);
}


/*
 * Local error on the given source.
 */
void
mta_source_error(struct mta_relay *relay, struct mta_route *route, const char *e)
{
	struct mta_connector	*c;

	/*
	 * Remember the source as broken for this connector.
	 */
	c = mta_connector(relay, route->src);
	if (!(c->flags & CONNECTOR_ERROR_SOURCE))
		log_info("smtp-out: Error on %s: %s",
		    mta_route_to_text(route), e);
	c->flags |= CONNECTOR_ERROR_SOURCE;
}

void
mta_route_error(struct mta_relay *relay, struct mta_route *route)
{
#if 0
	route->nerror += 1;

	if (route->nerror > MAXERROR_PER_ROUTE) {
		log_info("smtp-out: Too many errors on %s: "
		    "disabling for a while", mta_route_to_text(route));
		mta_route_disable(route, 2, ROUTE_DISABLED_SMTP);
	}
#endif
}

void
mta_route_ok(struct mta_relay *relay, struct mta_route *route)
{
	struct mta_connector	*c;

	if (!(route->flags & ROUTE_NEW))
		return;

	log_debug("debug: mta-routing: route %s is now valid.",
	    mta_route_to_text(route));

	route->nerror = 0;
	route->flags &= ~ROUTE_NEW;

	c = mta_connector(relay, route->src);
	mta_connect(c);
}

void
mta_route_down(struct mta_relay *relay, struct mta_route *route)
{
#if 0
	mta_route_disable(route, 2, ROUTE_DISABLED_SMTP);
#endif
}

void
mta_route_collect(struct mta_relay *relay, struct mta_route *route)
{
	struct mta_connector	*c;

	log_debug("debug: mta_route_collect(%s)",
	    mta_route_to_text(route));

	relay->nconn -= 1;
	relay->domain->nconn -= 1;
	route->nconn -= 1;
	route->src->nconn -= 1;
	route->dst->nconn -= 1;
	route->lastdisc = time(NULL);

	/* First connection failed */
	if (route->flags & ROUTE_NEW)
		mta_route_disable(route, 1, ROUTE_DISABLED_NET);

	c = mta_connector(relay, route->src);
	c->nconn -= 1;
	mta_connect(c);
	mta_route_unref(route); /* from mta_find_route() */
	mta_relay_unref(relay); /* from mta_connect() */
}

struct mta_task *
mta_route_next_task(struct mta_relay *relay, struct mta_route *route)
{
	struct mta_task	*task;

	if ((task = TAILQ_FIRST(&relay->tasks))) {
		TAILQ_REMOVE(&relay->tasks, task, entry);
		relay->ntask -= 1;
		task->relay = NULL;

		/* When the number of tasks is down to lowat, query some evp */
		if (relay->ntask == (size_t)relay->limits->task_lowat) {
			if (relay->state & RELAY_ONHOLD) {
				log_info("smtp-out: back to lowat on %s: releasing",
				    mta_relay_to_text(relay));
				relay->state &= ~RELAY_ONHOLD;
			}
			if (relay->state & RELAY_HOLDQ) {
				m_create(p_queue, IMSG_MTA_HOLDQ_RELEASE, 0, 0, -1);
				m_add_id(p_queue, relay->id);
				m_add_int(p_queue, relay->limits->task_release);
				m_close(p_queue);
			}
		}
		else if (relay->ntask == 0 && relay->state & RELAY_HOLDQ) {
			m_create(p_queue, IMSG_MTA_HOLDQ_RELEASE, 0, 0, -1);
			m_add_id(p_queue, relay->id);
			m_add_int(p_queue, 0);
			m_close(p_queue);
		}
	}

	return (task);
}

static void
mta_handle_envelope(struct envelope *evp, const char *smarthost)
{
	struct mta_relay	*relay;
	struct mta_task		*task;
	struct mta_envelope	*e;
	struct dispatcher	*dispatcher;
	struct mailaddr		 maddr;
	struct relayhost	 relayh;
	char			 buf[LINE_MAX];

	dispatcher = dict_xget(env->sc_dispatchers, evp->dispatcher);
	if (dispatcher->u.remote.smarthost && smarthost == NULL) {
		mta_query_smarthost(evp);
		return;
	}

	memset(&relayh, 0, sizeof(relayh));
	relayh.tls = RELAY_TLS_OPPORTUNISTIC;
	if (smarthost && !text_to_relayhost(&relayh, smarthost)) {
		log_warnx("warn: Failed to parse smarthost %s", smarthost);
		m_create(p_queue, IMSG_MTA_DELIVERY_TEMPFAIL, 0, 0, -1);
		m_add_evpid(p_queue, evp->id);
		m_add_string(p_queue, "Cannot parse smarthost");
		m_add_int(p_queue, ESC_OTHER_STATUS);
		m_close(p_queue);
		return;
	}

	if (relayh.flags & RELAY_AUTH && dispatcher->u.remote.auth == NULL) {
		log_warnx("warn: No auth table on action \"%s\" for relay %s",
		    evp->dispatcher, smarthost);
		m_create(p_queue, IMSG_MTA_DELIVERY_TEMPFAIL, 0, 0, -1);
		m_add_evpid(p_queue, evp->id);
		m_add_string(p_queue, "No auth table for relaying");
		m_add_int(p_queue, ESC_OTHER_STATUS);
		m_close(p_queue);
		return;
	}

	if (dispatcher->u.remote.tls_required) {
		/* Reject relay if smtp+notls:// is requested */
		if (relayh.tls == RELAY_TLS_NO) {
			log_warnx("warn: TLS required for action \"%s\"",
			    evp->dispatcher);
			m_create(p_queue, IMSG_MTA_DELIVERY_TEMPFAIL, 0, 0, -1);
			m_add_evpid(p_queue, evp->id);
			m_add_string(p_queue, "TLS required for relaying");
			m_add_int(p_queue, ESC_OTHER_STATUS);
			m_close(p_queue);
			return;
		}
		/* Update smtp:// to smtp+tls:// */
		if (relayh.tls == RELAY_TLS_OPPORTUNISTIC)
			relayh.tls = RELAY_TLS_STARTTLS;
	}

	relay = mta_relay(evp, &relayh);
	/* ignore if we don't know the limits yet */
	if (relay->limits &&
	    relay->ntask >= (size_t)relay->limits->task_hiwat) {
		if (!(relay->state & RELAY_ONHOLD)) {
			log_info("smtp-out: hiwat reached on %s: holding envelopes",
			    mta_relay_to_text(relay));
			relay->state |= RELAY_ONHOLD;
		}
	}

	/*
	 * If the relay has too many pending tasks, tell the
	 * scheduler to hold it until further notice
	 */
	if (relay->state & RELAY_ONHOLD) {
		relay->state |= RELAY_HOLDQ;
		m_create(p_queue, IMSG_MTA_DELIVERY_HOLD, 0, 0, -1);
		m_add_evpid(p_queue, evp->id);
		m_add_id(p_queue, relay->id);
		m_close(p_queue);
		mta_relay_unref(relay); /* from here */
		return;
	}

	task = NULL;
	TAILQ_FOREACH(task, &relay->tasks, entry)
		if (task->msgid == evpid_to_msgid(evp->id))
			break;

	if (task == NULL) {
		task = xmalloc(sizeof *task);
		TAILQ_INIT(&task->envelopes);
		task->relay = relay;
		relay->ntask += 1;
		TAILQ_INSERT_TAIL(&relay->tasks, task, entry);
		task->msgid = evpid_to_msgid(evp->id);
		if (evp->sender.user[0] || evp->sender.domain[0])
			(void)snprintf(buf, sizeof buf, "%s@%s",
			    evp->sender.user, evp->sender.domain);
		else
			buf[0] = '\0';

		if (dispatcher->u.remote.mail_from && evp->sender.user[0]) {
			memset(&maddr, 0, sizeof (maddr));
			if (text_to_mailaddr(&maddr,
				dispatcher->u.remote.mail_from)) {
				(void)snprintf(buf, sizeof buf, "%s@%s",
				    maddr.user[0] ? maddr.user : evp->sender.user,
				    maddr.domain[0] ? maddr.domain : evp->sender.domain);
			}
		}

		task->sender = xstrdup(buf);
		stat_increment("mta.task", 1);
	}

	e = xcalloc(1, sizeof *e);
	e->id = evp->id;
	e->creation = evp->creation;
	e->smtpname = xstrdup(evp->smtpname);
	(void)snprintf(buf, sizeof buf, "%s@%s",
	    evp->dest.user, evp->dest.domain);
	e->dest = xstrdup(buf);
	(void)snprintf(buf, sizeof buf, "%s@%s",
	    evp->rcpt.user, evp->rcpt.domain);
	if (strcmp(buf, e->dest))
		e->rcpt = xstrdup(buf);
	e->task = task;
	if (evp->dsn_orcpt[0] != '\0')
		e->dsn_orcpt = xstrdup(evp->dsn_orcpt);
	(void)strlcpy(e->dsn_envid, evp->dsn_envid,
	    sizeof e->dsn_envid);
	e->dsn_notify = evp->dsn_notify;
	e->dsn_ret = evp->dsn_ret;

	TAILQ_INSERT_TAIL(&task->envelopes, e, entry);
	log_debug("debug: mta: received evp:%016" PRIx64
	    " for <%s>", e->id, e->dest);

	stat_increment("mta.envelope", 1);

	mta_drain(relay);
	mta_relay_unref(relay); /* from here */
}

static void
mta_delivery_flush_event(int fd, short event, void *arg)
{
	struct mta_envelope	*e;
	struct timeval		 tv;

	if (tree_poproot(&flush_evp, NULL, (void**)(&e))) {

		if (e->delivery == IMSG_MTA_DELIVERY_OK) {
			m_create(p_queue, IMSG_MTA_DELIVERY_OK, 0, 0, -1);
			m_add_evpid(p_queue, e->id);
			m_add_int(p_queue, e->ext);
			m_close(p_queue);
		} else if (e->delivery == IMSG_MTA_DELIVERY_TEMPFAIL) {
			m_create(p_queue, IMSG_MTA_DELIVERY_TEMPFAIL, 0, 0, -1);
			m_add_evpid(p_queue, e->id);
			m_add_string(p_queue, e->status);
			m_add_int(p_queue, ESC_OTHER_STATUS);
			m_close(p_queue);
		}
		else if (e->delivery == IMSG_MTA_DELIVERY_PERMFAIL) {
			m_create(p_queue, IMSG_MTA_DELIVERY_PERMFAIL, 0, 0, -1);
			m_add_evpid(p_queue, e->id);
			m_add_string(p_queue, e->status);
			m_add_int(p_queue, ESC_OTHER_STATUS);
			m_close(p_queue);
		}
		else if (e->delivery == IMSG_MTA_DELIVERY_LOOP) {
			m_create(p_queue, IMSG_MTA_DELIVERY_LOOP, 0, 0, -1);
			m_add_evpid(p_queue, e->id);
			m_close(p_queue);
		}
		else {
			log_warnx("warn: bad delivery type %d for %016" PRIx64,
			    e->delivery, e->id);
			fatalx("aborting");
		}

		log_debug("debug: mta: flush for %016"PRIx64" (-> %s)", e->id, e->dest);

		free(e->smtpname);
		free(e->dest);
		free(e->rcpt);
		free(e->dsn_orcpt);
		free(e);

		tv.tv_sec = 0;
		tv.tv_usec = 0;
		evtimer_add(&ev_flush_evp, &tv);
	}
}

void
mta_delivery_log(struct mta_envelope *e, const char *source, const char *relay,
    int delivery, const char *status)
{
	if (delivery == IMSG_MTA_DELIVERY_OK)
		mta_log(e, "Ok", source, relay, status);
	else if (delivery == IMSG_MTA_DELIVERY_TEMPFAIL)
		mta_log(e, "TempFail", source, relay, status);
	else if (delivery == IMSG_MTA_DELIVERY_PERMFAIL)
		mta_log(e, "PermFail", source, relay, status);
	else if (delivery == IMSG_MTA_DELIVERY_LOOP)
		mta_log(e, "PermFail", source, relay, "Loop detected");
	else {
		log_warnx("warn: bad delivery type %d for %016" PRIx64,
		    delivery, e->id);
		fatalx("aborting");
	}

	e->delivery = delivery;
	if (status)
		(void)strlcpy(e->status, status, sizeof(e->status));
}

void
mta_delivery_notify(struct mta_envelope *e)
{
	struct timeval	tv;

	tree_xset(&flush_evp, e->id, e);
	if (tree_count(&flush_evp) == 1) {
		tv.tv_sec = 0;
		tv.tv_usec = 0;
		evtimer_add(&ev_flush_evp, &tv);
	}
}

static void
mta_query_mx(struct mta_relay *relay)
{
	uint64_t	id;

	if (relay->status & RELAY_WAIT_MX)
		return;

	log_debug("debug: mta: querying MX for %s...",
	    mta_relay_to_text(relay));

	if (waitq_wait(&relay->domain->mxs, mta_on_mx, relay)) {
		id = generate_uid();
		tree_xset(&wait_mx, id, relay->domain);
		if (relay->domain->as_host)
			m_create(p_lka,  IMSG_MTA_DNS_HOST, 0, 0, -1);
		else
			m_create(p_lka,  IMSG_MTA_DNS_MX, 0, 0, -1);
		m_add_id(p_lka, id);
		m_add_string(p_lka, relay->domain->name);
		m_close(p_lka);
	}
	relay->status |= RELAY_WAIT_MX;
	mta_relay_ref(relay);
}

static void
mta_query_limits(struct mta_relay *relay)
{
	if (relay->status & RELAY_WAIT_LIMITS)
		return;

	relay->limits = dict_get(env->sc_limits_dict, relay->domain->name);
	if (relay->limits == NULL)
		relay->limits = dict_get(env->sc_limits_dict, "default");

	if (max_seen_conndelay_route < relay->limits->conndelay_route)
		max_seen_conndelay_route = relay->limits->conndelay_route;
	if (max_seen_discdelay_route < relay->limits->discdelay_route)
		max_seen_discdelay_route = relay->limits->discdelay_route;
}

static void
mta_query_secret(struct mta_relay *relay)
{
	if (relay->status & RELAY_WAIT_SECRET)
		return;

	log_debug("debug: mta: querying secret for %s...",
	    mta_relay_to_text(relay));

	tree_xset(&wait_secret, relay->id, relay);
	relay->status |= RELAY_WAIT_SECRET;

	m_create(p_lka, IMSG_MTA_LOOKUP_CREDENTIALS, 0, 0, -1);
	m_add_id(p_lka, relay->id);
	m_add_string(p_lka, relay->authtable);
	m_add_string(p_lka, relay->authlabel);
	m_close(p_lka);

	mta_relay_ref(relay);
}

static void
mta_query_smarthost(struct envelope *evp0)
{
	struct dispatcher *dispatcher;
	struct envelope *evp;

	evp = malloc(sizeof(*evp));
	memmove(evp, evp0, sizeof(*evp));

	dispatcher = dict_xget(env->sc_dispatchers, evp->dispatcher);

	log_debug("debug: mta: querying smarthost for %s:%s...",
	    evp->dispatcher, dispatcher->u.remote.smarthost);

	tree_xset(&wait_smarthost, evp->id, evp);

	m_create(p_lka, IMSG_MTA_LOOKUP_SMARTHOST, 0, 0, -1);
	m_add_id(p_lka, evp->id);
	if (dispatcher->u.remote.smarthost_domain)
		m_add_string(p_lka, evp->dest.domain);
	else
		m_add_string(p_lka, NULL);
	m_add_string(p_lka, dispatcher->u.remote.smarthost);
	m_close(p_lka);

	log_debug("debug: mta: querying smarthost");
}

static void
mta_query_preference(struct mta_relay *relay)
{
	if (relay->status & RELAY_WAIT_PREFERENCE)
		return;

	log_debug("debug: mta: querying preference for %s...",
	    mta_relay_to_text(relay));

	tree_xset(&wait_preference, relay->id, relay);
	relay->status |= RELAY_WAIT_PREFERENCE;

	m_create(p_lka,  IMSG_MTA_DNS_MX_PREFERENCE, 0, 0, -1);
	m_add_id(p_lka, relay->id);
	m_add_string(p_lka, relay->domain->name);
	m_add_string(p_lka, relay->backupname);
	m_close(p_lka);

	mta_relay_ref(relay);
}

static void
mta_query_source(struct mta_relay *relay)
{
	log_debug("debug: mta: querying source for %s...",
	    mta_relay_to_text(relay));

	relay->sourceloop += 1;

	if (relay->sourcetable == NULL) {
		/*
		 * This is a recursive call, but it only happens once, since
		 * another source will not be queried immediately.
		 */
		mta_relay_ref(relay);
		mta_on_source(relay, mta_source(NULL));
		return;
	}

	m_create(p_lka, IMSG_MTA_LOOKUP_SOURCE, 0, 0, -1);
	m_add_id(p_lka, relay->id);
	m_add_string(p_lka, relay->sourcetable);
	m_close(p_lka);

	tree_xset(&wait_source, relay->id, relay);
	relay->status |= RELAY_WAIT_SOURCE;
	mta_relay_ref(relay);
}

static void
mta_on_mx(void *tag, void *arg, void *data)
{
	struct mta_domain	*domain = data;
	struct mta_relay	*relay = arg;

	log_debug("debug: mta: ... got mx (%p, %s, %s)",
	    tag, domain->name, mta_relay_to_text(relay));

	switch (domain->mxstatus) {
	case DNS_OK:
		break;
	case DNS_RETRY:
		relay->fail = IMSG_MTA_DELIVERY_TEMPFAIL;
		relay->failstr = "Temporary failure in MX lookup";
		break;
	case DNS_EINVAL:
		relay->fail = IMSG_MTA_DELIVERY_PERMFAIL;
		relay->failstr = "Invalid domain name";
		break;
	case DNS_ENONAME:
		relay->fail = IMSG_MTA_DELIVERY_PERMFAIL;
		relay->failstr = "Domain does not exist";
		break;
	case DNS_ENOTFOUND:
		relay->fail = IMSG_MTA_DELIVERY_TEMPFAIL;
		if (relay->domain->as_host)
			relay->failstr = "Host not found";
		else
			relay->failstr = "No MX found for domain";
		break;
	case DNS_NULLMX:
		relay->fail = IMSG_MTA_DELIVERY_PERMFAIL;
		relay->failstr = "Domain does not accept mail";
		break;
	default:
		fatalx("bad DNS lookup error code");
		break;
	}

	if (domain->mxstatus)
		log_info("smtp-out: Failed to resolve MX for %s: %s",
		    mta_relay_to_text(relay), relay->failstr);

	relay->status &= ~RELAY_WAIT_MX;
	mta_drain(relay);
	mta_relay_unref(relay); /* from mta_drain() */
}

static void
mta_on_secret(struct mta_relay *relay, const char *secret)
{
	log_debug("debug: mta: ... got secret for %s: %s",
	    mta_relay_to_text(relay), secret);

	if (secret)
		relay->secret = strdup(secret);

	if (relay->secret == NULL) {
		log_warnx("warn: Failed to retrieve secret "
			    "for %s", mta_relay_to_text(relay));
		relay->fail = IMSG_MTA_DELIVERY_TEMPFAIL;
		relay->failstr = "Could not retrieve credentials";
	}

	relay->status &= ~RELAY_WAIT_SECRET;
	mta_drain(relay);
	mta_relay_unref(relay); /* from mta_query_secret() */
}

static void
mta_on_smarthost(struct envelope *evp, const char *smarthost)
{
	if (smarthost == NULL) {
		log_warnx("warn: Failed to retrieve smarthost "
			    "for envelope %"PRIx64, evp->id);
		m_create(p_queue, IMSG_MTA_DELIVERY_TEMPFAIL, 0, 0, -1);
		m_add_evpid(p_queue, evp->id);
		m_add_string(p_queue, "Cannot retrieve smarthost");
		m_add_int(p_queue, ESC_OTHER_STATUS);
		m_close(p_queue);
		return;
	}

	log_debug("debug: mta: ... got smarthost for %016"PRIx64": %s",
	    evp->id, smarthost);
	mta_handle_envelope(evp, smarthost);
	free(evp);
}

static void
mta_on_preference(struct mta_relay *relay, int preference)
{
	log_debug("debug: mta: ... got preference for %s: %d",
	    mta_relay_to_text(relay), preference);

	relay->backuppref = preference;

	relay->status &= ~RELAY_WAIT_PREFERENCE;
	mta_drain(relay);
	mta_relay_unref(relay); /* from mta_query_preference() */
}

static void
mta_on_source(struct mta_relay *relay, struct mta_source *source)
{
	struct mta_connector	*c;
	void			*iter;
	int			 delay, errmask;

	log_debug("debug: mta: ... got source for %s: %s",
	    mta_relay_to_text(relay), source ? mta_source_to_text(source) : "NULL");

	relay->lastsource = time(NULL);
	delay = DELAY_CHECK_SOURCE_SLOW;

	if (source) {
		c = mta_connector(relay, source);
		if (c->flags & CONNECTOR_NEW) {
			c->flags &= ~CONNECTOR_NEW;
			delay = DELAY_CHECK_SOURCE;
		}
		mta_connect(c);
		if ((c->flags & CONNECTOR_ERROR) == 0)
			relay->sourceloop = 0;
		else
			delay = DELAY_CHECK_SOURCE_FAST;
		mta_source_unref(source); /* from constructor */
	}
	else {
		log_warnx("warn: Failed to get source address for %s",
		    mta_relay_to_text(relay));
	}

	if (tree_count(&relay->connectors) == 0) {
		relay->fail = IMSG_MTA_DELIVERY_TEMPFAIL;
		relay->failstr = "Could not retrieve source address";
	}
	if (tree_count(&relay->connectors) < relay->sourceloop) {
		relay->fail = IMSG_MTA_DELIVERY_TEMPFAIL;
		relay->failstr = "No valid route to remote MX";

		errmask = 0;
		iter = NULL;
		while (tree_iter(&relay->connectors, &iter, NULL, (void **)&c))
			errmask |= c->flags;

		if (errmask & CONNECTOR_ERROR_ROUTE_SMTP)
			relay->failstr = "Destination seem to reject all mails";
		else if (errmask & CONNECTOR_ERROR_ROUTE_NET)
			relay->failstr = "Network error on destination MXs";
		else if (errmask & CONNECTOR_ERROR_MX)
			relay->failstr = "No MX found for destination";
		else if (errmask & CONNECTOR_ERROR_FAMILY)
			relay->failstr = "Address family mismatch on destination MXs";
		else if (errmask & CONNECTOR_ERROR_BLOCKED)
			relay->failstr = "All routes to destination blocked";
		else
			relay->failstr = "No valid route to destination";
	}

	relay->nextsource = relay->lastsource + delay;
	relay->status &= ~RELAY_WAIT_SOURCE;
	mta_drain(relay);
	mta_relay_unref(relay); /* from mta_query_source() */
}

static void
mta_connect(struct mta_connector *c)
{
	struct mta_route	*route;
	struct mta_mx		*mx;
	struct mta_limits	*l = c->relay->limits;
	int			 limits;
	time_t			 nextconn, now;

	/* toggle the block flag */
	if (mta_is_blocked(c->source, c->relay->domain->name))
		c->flags |= CONNECTOR_ERROR_BLOCKED;
	else
		c->flags &= ~CONNECTOR_ERROR_BLOCKED;

    again:

	log_debug("debug: mta: connecting with %s", mta_connector_to_text(c));

	/* Do not connect if this connector has an error. */
	if (c->flags & CONNECTOR_ERROR) {
		log_debug("debug: mta: connector error");
		return;
	}

	if (c->flags & CONNECTOR_WAIT) {
		log_debug("debug: mta: cancelling connector timeout");
		runq_cancel(runq_connector, c);
		c->flags &= ~CONNECTOR_WAIT;
	}

	/* No job. */
	if (c->relay->ntask == 0) {
		log_debug("debug: mta: no task for connector");
		return;
	}

	/* Do not create more connections than necessary */
	if ((c->relay->nconn_ready >= c->relay->ntask) ||
	    (c->relay->nconn > 2 && c->relay->nconn >= c->relay->ntask / 2)) {
		log_debug("debug: mta: enough connections already");
		return;
	}

	limits = 0;
	nextconn = now = time(NULL);

	if (c->relay->domain->lastconn + l->conndelay_domain > nextconn) {
		log_debug("debug: mta: cannot use domain %s before %llus",
		    c->relay->domain->name,
		    (unsigned long long) c->relay->domain->lastconn + l->conndelay_domain - now);
		nextconn = c->relay->domain->lastconn + l->conndelay_domain;
	}
	if (c->relay->domain->nconn >= l->maxconn_per_domain) {
		log_debug("debug: mta: hit domain limit");
		limits |= CONNECTOR_LIMIT_DOMAIN;
	}

	if (c->source->lastconn + l->conndelay_source > nextconn) {
		log_debug("debug: mta: cannot use source %s before %llus",
		    mta_source_to_text(c->source),
		    (unsigned long long) c->source->lastconn + l->conndelay_source - now);
		nextconn = c->source->lastconn + l->conndelay_source;
	}
	if (c->source->nconn >= l->maxconn_per_source) {
		log_debug("debug: mta: hit source limit");
		limits |= CONNECTOR_LIMIT_SOURCE;
	}

	if (c->lastconn + l->conndelay_connector > nextconn) {
		log_debug("debug: mta: cannot use %s before %llus",
		    mta_connector_to_text(c),
		    (unsigned long long) c->lastconn + l->conndelay_connector - now);
		nextconn = c->lastconn + l->conndelay_connector;
	}
	if (c->nconn >= l->maxconn_per_connector) {
		log_debug("debug: mta: hit connector limit");
		limits |= CONNECTOR_LIMIT_CONN;
	}

	if (c->relay->lastconn + l->conndelay_relay > nextconn) {
		log_debug("debug: mta: cannot use %s before %llus",
		    mta_relay_to_text(c->relay),
		    (unsigned long long) c->relay->lastconn + l->conndelay_relay - now);
		nextconn = c->relay->lastconn + l->conndelay_relay;
	}
	if (c->relay->nconn >= l->maxconn_per_relay) {
		log_debug("debug: mta: hit relay limit");
		limits |= CONNECTOR_LIMIT_RELAY;
	}

	/* We can connect now, find a route */
	if (!limits && nextconn <= now)
		route = mta_find_route(c, now, &limits, &nextconn, &mx);
	else
		route = NULL;

	/* No route */
	if (route == NULL) {

		if (c->flags & CONNECTOR_ERROR) {
			/* XXX we might want to clear this flag later */
			log_debug("debug: mta-routing: no route available for %s: errors on connector",
			    mta_connector_to_text(c));
			return;
		}
		else if (limits) {
			log_debug("debug: mta-routing: no route available for %s: limits reached",
			    mta_connector_to_text(c));
			nextconn = now + DELAY_CHECK_LIMIT;
		}
		else {
			log_debug("debug: mta-routing: no route available for %s: must wait a bit",
			    mta_connector_to_text(c));
		}
		log_debug("debug: mta: retrying to connect on %s in %llus...",
		    mta_connector_to_text(c),
		    (unsigned long long) nextconn - time(NULL));
		c->flags |= CONNECTOR_WAIT;
		runq_schedule_at(runq_connector, nextconn, c);
		return;
	}

	log_debug("debug: mta-routing: spawning new connection on %s",
		    mta_route_to_text(route));

	c->nconn += 1;
	c->lastconn = time(NULL);

	c->relay->nconn += 1;
	c->relay->lastconn = c->lastconn;
	c->relay->domain->nconn += 1;
	c->relay->domain->lastconn = c->lastconn;
	route->nconn += 1;
	route->lastconn = c->lastconn;
	route->src->nconn += 1;
	route->src->lastconn = c->lastconn;
	route->dst->nconn += 1;
	route->dst->lastconn = c->lastconn;

	mta_session(c->relay, route, mx->mxname);	/* this never fails synchronously */
	mta_relay_ref(c->relay);

	goto again;
}

static void
mta_on_timeout(struct runq *runq, void *arg)
{
	struct mta_connector	*connector = arg;
	struct mta_relay	*relay = arg;
	struct mta_route	*route = arg;
	struct hoststat		*hs = arg;

	if (runq == runq_relay) {
		log_debug("debug: mta: ... timeout for %s",
		    mta_relay_to_text(relay));
		relay->status &= ~RELAY_WAIT_CONNECTOR;
		mta_drain(relay);
		mta_relay_unref(relay); /* from mta_drain() */
	}
	else if (runq == runq_connector) {
		log_debug("debug: mta: ... timeout for %s",
		    mta_connector_to_text(connector));
		connector->flags &= ~CONNECTOR_WAIT;
		mta_connect(connector);
	}
	else if (runq == runq_route) {
		route->flags &= ~ROUTE_RUNQ;
		mta_route_enable(route);
		mta_route_unref(route);
	}
	else if (runq == runq_hoststat) {
		log_debug("debug: mta: ... timeout for hoststat %s",
			hs->name);
		mta_hoststat_remove_entry(hs);
		free(hs);
	}
}

static void
mta_route_disable(struct mta_route *route, int penalty, int reason)
{
	unsigned long long	delay;

	route->penalty += penalty;
	route->lastpenalty = time(NULL);
	delay = (unsigned long long)DELAY_ROUTE_BASE * route->penalty * route->penalty;
	if (delay > DELAY_ROUTE_MAX)
		delay = DELAY_ROUTE_MAX;
#if 0
	delay = 60;
#endif

	log_info("smtp-out: Disabling route %s for %llus",
	    mta_route_to_text(route), delay);

	if (route->flags & ROUTE_DISABLED)
		runq_cancel(runq_route, route);
	else
		mta_route_ref(route);

	route->flags |= reason & ROUTE_DISABLED;
	runq_schedule(runq_route, delay, route);
}

static void
mta_route_enable(struct mta_route *route)
{
	if (route->flags & ROUTE_DISABLED) {
		log_info("smtp-out: Enabling route %s",
		    mta_route_to_text(route));
		route->flags &= ~ROUTE_DISABLED;
		route->flags |= ROUTE_NEW;
		route->nerror = 0;
	}

	if (route->penalty) {
#if DELAY_QUADRATIC
		route->penalty -= 1;
		route->lastpenalty = time(NULL);
#else
		route->penalty = 0;
#endif
	}
}

static void
mta_drain(struct mta_relay *r)
{
	char			 buf[64];

	log_debug("debug: mta: draining %s "
	    "refcount=%d, ntask=%zu, nconnector=%zu, nconn=%zu",
	    mta_relay_to_text(r),
	    r->refcount, r->ntask, tree_count(&r->connectors), r->nconn);

	/*
	 * All done.
	 */
	if (r->ntask == 0) {
		log_debug("debug: mta: all done for %s", mta_relay_to_text(r));
		return;
	}

	/*
	 * If we know that this relay is failing flush the tasks.
	 */
	if (r->fail) {
		mta_flush(r, r->fail, r->failstr);
		return;
	}

	/* Query secret if needed. */
	if (r->flags & RELAY_AUTH && r->secret == NULL)
		mta_query_secret(r);

	/* Query our preference if needed. */
	if (r->backupname && r->backuppref == -1)
		mta_query_preference(r);

	/* Query the domain MXs if needed. */
	if (r->domain->lastmxquery == 0)
		mta_query_mx(r);

	/* Query the limits if needed. */
	if (r->limits == NULL)
		mta_query_limits(r);

	/* Wait until we are ready to proceed. */
	if (r->status & RELAY_WAITMASK) {
		buf[0] = '\0';
		if (r->status & RELAY_WAIT_MX)
			(void)strlcat(buf, " MX", sizeof buf);
		if (r->status & RELAY_WAIT_PREFERENCE)
			(void)strlcat(buf, " preference", sizeof buf);
		if (r->status & RELAY_WAIT_SECRET)
			(void)strlcat(buf, " secret", sizeof buf);
		if (r->status & RELAY_WAIT_SOURCE)
			(void)strlcat(buf, " source", sizeof buf);
		if (r->status & RELAY_WAIT_CONNECTOR)
			(void)strlcat(buf, " connector", sizeof buf);
		log_debug("debug: mta: %s waiting for%s",
		    mta_relay_to_text(r), buf);
		return;
	}

	/*
	 * We have pending task, and it's maybe time too try a new source.
	 */
	if (r->nextsource <= time(NULL))
		mta_query_source(r);
	else {
		log_debug("debug: mta: scheduling relay %s in %llus...",
		    mta_relay_to_text(r),
		    (unsigned long long) r->nextsource - time(NULL));
		runq_schedule_at(runq_relay, r->nextsource, r);
		r->status |= RELAY_WAIT_CONNECTOR;
		mta_relay_ref(r);
	}
}

static void
mta_flush(struct mta_relay *relay, int fail, const char *error)
{
	struct mta_envelope	*e;
	struct mta_task		*task;
	const char     		*domain;
	void			*iter;
	struct mta_connector	*c;
	size_t			 n, r;

	log_debug("debug: mta_flush(%s, %d, \"%s\")",
	    mta_relay_to_text(relay), fail, error);

	if (fail != IMSG_MTA_DELIVERY_TEMPFAIL && fail != IMSG_MTA_DELIVERY_PERMFAIL)
		fatalx("unexpected delivery status %d", fail);

	n = 0;
	while ((task = TAILQ_FIRST(&relay->tasks))) {
		TAILQ_REMOVE(&relay->tasks, task, entry);
		while ((e = TAILQ_FIRST(&task->envelopes))) {
			TAILQ_REMOVE(&task->envelopes, e, entry);

			/*
			 * host was suspended, cache envelope id in hoststat tree
			 * so that it can be retried when a delivery succeeds for
			 * that domain.
			 */
			domain = strchr(e->dest, '@');
			if (fail == IMSG_MTA_DELIVERY_TEMPFAIL && domain) {
				r = 0;
				iter = NULL;
				while (tree_iter(&relay->connectors, &iter,
					NULL, (void **)&c)) {
					if (c->flags & CONNECTOR_ERROR_ROUTE)
						r++;
				}
				if (tree_count(&relay->connectors) == r)
					mta_hoststat_cache(domain+1, e->id);
			}

			mta_delivery_log(e, NULL, relay->domain->name, fail, error);
			mta_delivery_notify(e);

			n++;
		}
		free(task->sender);
		free(task);
	}

	stat_decrement("mta.task", relay->ntask);
	stat_decrement("mta.envelope", n);
	relay->ntask = 0;

	/* release all waiting envelopes for the relay */
	if (relay->state & RELAY_HOLDQ) {
		m_create(p_queue, IMSG_MTA_HOLDQ_RELEASE, 0, 0, -1);
		m_add_id(p_queue, relay->id);
		m_add_int(p_queue, -1);
		m_close(p_queue);
	}
}

/*
 * Find a route to use for this connector
 */
static struct mta_route *
mta_find_route(struct mta_connector *c, time_t now, int *limits,
    time_t *nextconn, struct mta_mx **pmx)
{
	struct mta_route	*route, *best;
	struct mta_limits	*l = c->relay->limits;
	struct mta_mx		*mx;
	int			 level, limit_host, limit_route;
	int			 family_mismatch, seen, suspended_route;
	time_t			 tm;

	log_debug("debug: mta-routing: searching new route for %s...",
	    mta_connector_to_text(c));

	tm = 0;
	limit_host = 0;
	limit_route = 0;
	suspended_route = 0;
	family_mismatch = 0;
	level = -1;
	best = NULL;
	seen = 0;

	TAILQ_FOREACH(mx, &c->relay->domain->mxs, entry) {
		/*
		 * New preference level
		 */
		if (mx->preference > level) {
#ifndef IGNORE_MX_PREFERENCE
			/*
			 * Use the current best MX if found.
			 */
			if (best)
				break;

			/*
			 * No candidate found.  There are valid MXs at this
			 * preference level but they reached their limit, or
			 * we can't connect yet.
			 */
			if (limit_host || limit_route || tm)
				break;

			/*
			 *  If we are a backup MX, do not relay to MXs with
			 *  a greater preference value.
			 */
			if (c->relay->backuppref >= 0 &&
			    mx->preference >= c->relay->backuppref)
				break;

			/*
			 * Start looking at MXs on this preference level.
			 */
#endif
			level = mx->preference;
		}

		if (mx->host->flags & HOST_IGNORE)
			continue;

		/* Found a possibly valid mx */
		seen++;

		if ((c->source->sa &&
		     c->source->sa->sa_family != mx->host->sa->sa_family) ||
		    (l->family && l->family != mx->host->sa->sa_family)) {
			log_debug("debug: mta-routing: skipping host %s: AF mismatch",
			    mta_host_to_text(mx->host));
			family_mismatch = 1;
			continue;
		}

		if (mx->host->nconn >= l->maxconn_per_host) {
			log_debug("debug: mta-routing: skipping host %s: too many connections",
			    mta_host_to_text(mx->host));
			limit_host = 1;
			continue;
		}

		if (mx->host->lastconn + l->conndelay_host > now) {
			log_debug("debug: mta-routing: skipping host %s: cannot use before %llus",
			    mta_host_to_text(mx->host),
			    (unsigned long long) mx->host->lastconn + l->conndelay_host - now);
			if (tm == 0 || mx->host->lastconn + l->conndelay_host < tm)
				tm = mx->host->lastconn + l->conndelay_host;
			continue;
		}

		route = mta_route(c->source, mx->host);

		if (route->flags & ROUTE_DISABLED) {
			log_debug("debug: mta-routing: skipping route %s: suspend",
			    mta_route_to_text(route));
			suspended_route |= route->flags & ROUTE_DISABLED;
			mta_route_unref(route); /* from here */
			continue;
		}

		if (route->nconn && (route->flags & ROUTE_NEW)) {
			log_debug("debug: mta-routing: skipping route %s: not validated yet",
			    mta_route_to_text(route));
			limit_route = 1;
			mta_route_unref(route); /* from here */
			continue;
		}

		if (route->nconn >= l->maxconn_per_route) {
			log_debug("debug: mta-routing: skipping route %s: too many connections",
			    mta_route_to_text(route));
			limit_route = 1;
			mta_route_unref(route); /* from here */
			continue;
		}

		if (route->lastconn + l->conndelay_route > now) {
			log_debug("debug: mta-routing: skipping route %s: cannot use before %llus (delay after connect)",
			    mta_route_to_text(route),
			    (unsigned long long) route->lastconn + l->conndelay_route - now);
			if (tm == 0 || route->lastconn + l->conndelay_route < tm)
				tm = route->lastconn + l->conndelay_route;
			mta_route_unref(route); /* from here */
			continue;
		}

		if (route->lastdisc + l->discdelay_route > now) {
			log_debug("debug: mta-routing: skipping route %s: cannot use before %llus (delay after disconnect)",
			    mta_route_to_text(route),
			    (unsigned long long) route->lastdisc + l->discdelay_route - now);
			if (tm == 0 || route->lastdisc + l->discdelay_route < tm)
				tm = route->lastdisc + l->discdelay_route;
			mta_route_unref(route); /* from here */
			continue;
		}

		/* Use the route with the lowest number of connections. */
		if (best && route->nconn >= best->nconn) {
			log_debug("debug: mta-routing: skipping route %s: current one is better",
			    mta_route_to_text(route));
			mta_route_unref(route); /* from here */
			continue;
		}

		if (best)
			mta_route_unref(best); /* from here */
		best = route;
		*pmx = mx;
		log_debug("debug: mta-routing: selecting candidate route %s",
		    mta_route_to_text(route));
	}

	if (best)
		return (best);

	/* Order is important */
	if (seen == 0) {
		log_info("smtp-out: No MX found for %s",
		    mta_connector_to_text(c));
		c->flags |= CONNECTOR_ERROR_MX;
	}
	else if (limit_route) {
		log_debug("debug: mta: hit route limit");
		*limits |= CONNECTOR_LIMIT_ROUTE;
	}
	else if (limit_host) {
		log_debug("debug: mta: hit host limit");
		*limits |= CONNECTOR_LIMIT_HOST;
	}
	else if (tm) {
		if (tm > *nextconn)
			*nextconn = tm;
	}
	else if (family_mismatch) {
		log_info("smtp-out: Address family mismatch on %s",
		    mta_connector_to_text(c));
		c->flags |= CONNECTOR_ERROR_FAMILY;
	}
	else if (suspended_route) {
		log_info("smtp-out: No valid route for %s",
		    mta_connector_to_text(c));
		if (suspended_route & ROUTE_DISABLED_NET)
			c->flags |= CONNECTOR_ERROR_ROUTE_NET;
		if (suspended_route & ROUTE_DISABLED_SMTP)
			c->flags |= CONNECTOR_ERROR_ROUTE_SMTP;
	}

	return (NULL);
}

static void
mta_log(const struct mta_envelope *evp, const char *prefix, const char *source,
    const char *relay, const char *status)
{
	log_info("%016"PRIx64" mta delivery evpid=%016"PRIx64" "
	    "from=<%s> to=<%s> rcpt=<%s> source=\"%s\" "
	    "relay=\"%s\" delay=%s result=\"%s\" stat=\"%s\"",
	    evp->session,
	    evp->id,
	    evp->task->sender,
	    evp->dest,
	    evp->rcpt ? evp->rcpt : "-",
	    source ? source : "-",
	    relay,
	    duration_to_text(time(NULL) - evp->creation),
	    prefix,
	    status);
}

static struct mta_relay *
mta_relay(struct envelope *e, struct relayhost *relayh)
{
	struct dispatcher	*dispatcher;
	struct mta_relay	 key, *r;

	dispatcher = dict_xget(env->sc_dispatchers, e->dispatcher);

	memset(&key, 0, sizeof key);

	key.pki_name = dispatcher->u.remote.pki;
	key.ca_name = dispatcher->u.remote.ca;
	key.authtable = dispatcher->u.remote.auth;
	key.sourcetable = dispatcher->u.remote.source;
	key.helotable = dispatcher->u.remote.helo_source;
	key.heloname = dispatcher->u.remote.helo;
	key.srs = dispatcher->u.remote.srs;

	if (relayh->hostname[0]) {
		key.domain = mta_domain(relayh->hostname, 1);
	}
	else {
		key.domain = mta_domain(e->dest.domain, 0);
		if (dispatcher->u.remote.backup) {
			key.backupname = dispatcher->u.remote.backupmx;
			if (key.backupname == NULL)
				key.backupname = e->smtpname;
		}
	}

	key.tls = relayh->tls;
	key.flags |= relayh->flags;
	key.port = relayh->port;
	key.authlabel = relayh->authlabel;
	if (!key.authlabel[0])
		key.authlabel = NULL;

	if ((r = SPLAY_FIND(mta_relay_tree, &relays, &key)) == NULL) {
		r = xcalloc(1, sizeof *r);
		TAILQ_INIT(&r->tasks);
		r->id = generate_uid();
		r->dispatcher = dispatcher;
		r->tls = key.tls;
		r->flags = key.flags;
		r->domain = key.domain;
		r->backupname = key.backupname ?
		    xstrdup(key.backupname) : NULL;
		r->backuppref = -1;
		r->port = key.port;
		r->pki_name = key.pki_name ? xstrdup(key.pki_name) : NULL;
		r->ca_name = key.ca_name ? xstrdup(key.ca_name) : NULL;
		if (key.authtable)
			r->authtable = xstrdup(key.authtable);
		if (key.authlabel)
			r->authlabel = xstrdup(key.authlabel);
		if (key.sourcetable)
			r->sourcetable = xstrdup(key.sourcetable);
		if (key.helotable)
			r->helotable = xstrdup(key.helotable);
		if (key.heloname)
			r->heloname = xstrdup(key.heloname);
		r->srs = key.srs;
		SPLAY_INSERT(mta_relay_tree, &relays, r);
		stat_increment("mta.relay", 1);
	} else {
		mta_domain_unref(key.domain); /* from here */
	}

	r->refcount++;
	return (r);
}

static void
mta_relay_ref(struct mta_relay *r)
{
	r->refcount++;
}

static void
mta_relay_unref(struct mta_relay *relay)
{
	struct mta_connector	*c;

	if (--relay->refcount)
		return;

	/* Make sure they are no envelopes held for this relay */
	if (relay->state & RELAY_HOLDQ) {
		m_create(p_queue, IMSG_MTA_HOLDQ_RELEASE, 0, 0, -1);
		m_add_id(p_queue, relay->id);
		m_add_int(p_queue, 0);
		m_close(p_queue);
	}

	log_debug("debug: mta: freeing %s", mta_relay_to_text(relay));
	SPLAY_REMOVE(mta_relay_tree, &relays, relay);

	while ((tree_poproot(&relay->connectors, NULL, (void**)&c)))
		mta_connector_free(c);

	free(relay->authlabel);
	free(relay->authtable);
	free(relay->backupname);
	free(relay->pki_name);
	free(relay->ca_name);
	free(relay->helotable);
	free(relay->heloname);
	free(relay->secret);
	free(relay->sourcetable);

	mta_domain_unref(relay->domain); /* from constructor */
	free(relay);
	stat_decrement("mta.relay", 1);
}

const char *
mta_relay_to_text(struct mta_relay *relay)
{
	static char	 buf[1024];
	char		 tmp[32];
	const char	*sep = ",";

	(void)snprintf(buf, sizeof buf, "[relay:%s", relay->domain->name);

	if (relay->port) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)snprintf(tmp, sizeof tmp, "port=%d", (int)relay->port);
		(void)strlcat(buf, tmp, sizeof buf);
	}

	(void)strlcat(buf, sep, sizeof buf);
	switch(relay->tls) {
	case RELAY_TLS_OPPORTUNISTIC:
		(void)strlcat(buf, "smtp", sizeof buf);
		break;
	case RELAY_TLS_STARTTLS:
		(void)strlcat(buf, "smtp+tls", sizeof buf);
		break;
	case RELAY_TLS_SMTPS:
		(void)strlcat(buf, "smtps", sizeof buf);
		break;
	case RELAY_TLS_NO:
		if (relay->flags & RELAY_LMTP)
			(void)strlcat(buf, "lmtp", sizeof buf);
		else
			(void)strlcat(buf, "smtp+notls", sizeof buf);
		break;
	default:
		(void)strlcat(buf, "???", sizeof buf);
	}

	if (relay->flags & RELAY_AUTH) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "auth=", sizeof buf);
		(void)strlcat(buf, relay->authtable, sizeof buf);
		(void)strlcat(buf, ":", sizeof buf);
		(void)strlcat(buf, relay->authlabel, sizeof buf);
	}

	if (relay->pki_name) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "pki_name=", sizeof buf);
		(void)strlcat(buf, relay->pki_name, sizeof buf);
	}

	if (relay->domain->as_host) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "mx", sizeof buf);
	}

	if (relay->backupname) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "backup=", sizeof buf);
		(void)strlcat(buf, relay->backupname, sizeof buf);
	}

	if (relay->sourcetable) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "sourcetable=", sizeof buf);
		(void)strlcat(buf, relay->sourcetable, sizeof buf);
	}

	if (relay->helotable) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "helotable=", sizeof buf);
		(void)strlcat(buf, relay->helotable, sizeof buf);
	}

	if (relay->heloname) {
		(void)strlcat(buf, sep, sizeof buf);
		(void)strlcat(buf, "heloname=", sizeof buf);
		(void)strlcat(buf, relay->heloname, sizeof buf);
	}

	(void)strlcat(buf, "]", sizeof buf);

	return (buf);
}

static void
mta_relay_show(struct mta_relay *r, struct mproc *p, uint32_t id, time_t t)
{
	struct mta_connector	*c;
	void			*iter;
	char			 buf[1024], flags[1024], dur[64];
	time_t			 to;

	flags[0] = '\0';

#define SHOWSTATUS(f, n) do {							\
		if (r->status & (f)) {						\
			if (flags[0])						\
				(void)strlcat(flags, ",", sizeof(flags));	\
			(void)strlcat(flags, (n), sizeof(flags));		\
		}								\
	} while(0)

	SHOWSTATUS(RELAY_WAIT_MX, "MX");
	SHOWSTATUS(RELAY_WAIT_PREFERENCE, "preference");
	SHOWSTATUS(RELAY_WAIT_SECRET, "secret");
	SHOWSTATUS(RELAY_WAIT_LIMITS, "limits");
	SHOWSTATUS(RELAY_WAIT_SOURCE, "source");
	SHOWSTATUS(RELAY_WAIT_CONNECTOR, "connector");
#undef SHOWSTATUS

	if (runq_pending(runq_relay, r, &to))
		(void)snprintf(dur, sizeof(dur), "%s", duration_to_text(to - t));
	else
		(void)strlcpy(dur, "-", sizeof(dur));

	(void)snprintf(buf, sizeof(buf), "%s refcount=%d ntask=%zu nconn=%zu lastconn=%s timeout=%s wait=%s%s",
	    mta_relay_to_text(r),
	    r->refcount,
	    r->ntask,
	    r->nconn,
	    r->lastconn ? duration_to_text(t - r->lastconn) : "-",
	    dur,
	    flags,
	    (r->state & RELAY_ONHOLD) ? "ONHOLD" : "");
	m_compose(p, IMSG_CTL_MTA_SHOW_RELAYS, id, 0, -1, buf, strlen(buf) + 1);

	iter = NULL;
	while (tree_iter(&r->connectors, &iter, NULL, (void **)&c)) {

		if (runq_pending(runq_connector, c, &to))
			(void)snprintf(dur, sizeof(dur), "%s", duration_to_text(to - t));
		else
			(void)strlcpy(dur, "-", sizeof(dur));

		flags[0] = '\0';

#define SHOWFLAG(f, n) do {							\
		if (c->flags & (f)) {						\
			if (flags[0])						\
				(void)strlcat(flags, ",", sizeof(flags));	\
			(void)strlcat(flags, (n), sizeof(flags));		\
		}								\
	} while(0)

		SHOWFLAG(CONNECTOR_NEW,		"NEW");
		SHOWFLAG(CONNECTOR_WAIT,	"WAIT");

		SHOWFLAG(CONNECTOR_ERROR_FAMILY,	"ERROR_FAMILY");
		SHOWFLAG(CONNECTOR_ERROR_SOURCE,	"ERROR_SOURCE");
		SHOWFLAG(CONNECTOR_ERROR_MX,		"ERROR_MX");
		SHOWFLAG(CONNECTOR_ERROR_ROUTE_NET,	"ERROR_ROUTE_NET");
		SHOWFLAG(CONNECTOR_ERROR_ROUTE_SMTP,	"ERROR_ROUTE_SMTP");
		SHOWFLAG(CONNECTOR_ERROR_BLOCKED,	"ERROR_BLOCKED");

		SHOWFLAG(CONNECTOR_LIMIT_HOST,		"LIMIT_HOST");
		SHOWFLAG(CONNECTOR_LIMIT_ROUTE,		"LIMIT_ROUTE");
		SHOWFLAG(CONNECTOR_LIMIT_SOURCE,	"LIMIT_SOURCE");
		SHOWFLAG(CONNECTOR_LIMIT_RELAY,		"LIMIT_RELAY");
		SHOWFLAG(CONNECTOR_LIMIT_CONN,		"LIMIT_CONN");
		SHOWFLAG(CONNECTOR_LIMIT_DOMAIN,	"LIMIT_DOMAIN");
#undef SHOWFLAG

		(void)snprintf(buf, sizeof(buf),
		    "  connector %s refcount=%d nconn=%zu lastconn=%s timeout=%s flags=%s",
		    mta_source_to_text(c->source),
		    c->refcount,
		    c->nconn,
		    c->lastconn ? duration_to_text(t - c->lastconn) : "-",
		    dur,
		    flags);
		m_compose(p, IMSG_CTL_MTA_SHOW_RELAYS, id, 0, -1, buf,
		    strlen(buf) + 1);


	}
}

static int
mta_relay_cmp(const struct mta_relay *a, const struct mta_relay *b)
{
	int	r;

	if (a->domain < b->domain)
		return (-1);
	if (a->domain > b->domain)
		return (1);

	if (a->tls < b->tls)
		return (-1);
	if (a->tls > b->tls)
		return (1);

	if (a->flags < b->flags)
		return (-1);
	if (a->flags > b->flags)
		return (1);

	if (a->port < b->port)
		return (-1);
	if (a->port > b->port)
		return (1);

	if (a->authtable == NULL && b->authtable)
		return (-1);
	if (a->authtable && b->authtable == NULL)
		return (1);
	if (a->authtable && ((r = strcmp(a->authtable, b->authtable))))
		return (r);
	if (a->authlabel == NULL && b->authlabel)
		return (-1);
	if (a->authlabel && b->authlabel == NULL)
		return (1);
	if (a->authlabel && ((r = strcmp(a->authlabel, b->authlabel))))
		return (r);
	if (a->sourcetable == NULL && b->sourcetable)
		return (-1);
	if (a->sourcetable && b->sourcetable == NULL)
		return (1);
	if (a->sourcetable && ((r = strcmp(a->sourcetable, b->sourcetable))))
		return (r);
	if (a->helotable == NULL && b->helotable)
		return (-1);
	if (a->helotable && b->helotable == NULL)
		return (1);
	if (a->helotable && ((r = strcmp(a->helotable, b->helotable))))
		return (r);
	if (a->heloname == NULL && b->heloname)
		return (-1);
	if (a->heloname && b->heloname == NULL)
		return (1);
	if (a->heloname && ((r = strcmp(a->heloname, b->heloname))))
		return (r);

	if (a->pki_name == NULL && b->pki_name)
		return (-1);
	if (a->pki_name && b->pki_name == NULL)
		return (1);
	if (a->pki_name && ((r = strcmp(a->pki_name, b->pki_name))))
		return (r);

	if (a->ca_name == NULL && b->ca_name)
		return (-1);
	if (a->ca_name && b->ca_name == NULL)
		return (1);
	if (a->ca_name && ((r = strcmp(a->ca_name, b->ca_name))))
		return (r);

	if (a->backupname == NULL && b->backupname)
		return (-1);
	if (a->backupname && b->backupname == NULL)
		return (1);
	if (a->backupname && ((r = strcmp(a->backupname, b->backupname))))
		return (r);

	if (a->srs < b->srs)
		return (-1);
	if (a->srs > b->srs)
		return (1);

	return (0);
}

SPLAY_GENERATE(mta_relay_tree, mta_relay, entry, mta_relay_cmp);

static struct mta_host *
mta_host(const struct sockaddr *sa)
{
	struct mta_host		key, *h;
	struct sockaddr_storage	ss;

	memmove(&ss, sa, sa->sa_len);
	key.sa = (struct sockaddr*)&ss;
	h = SPLAY_FIND(mta_host_tree, &hosts, &key);

	if (h == NULL) {
		h = xcalloc(1, sizeof(*h));
		h->sa = xmemdup(sa, sa->sa_len);
		SPLAY_INSERT(mta_host_tree, &hosts, h);
		stat_increment("mta.host", 1);
	}

	h->refcount++;
	return (h);
}

static void
mta_host_ref(struct mta_host *h)
{
	h->refcount++;
}

static void
mta_host_unref(struct mta_host *h)
{
	if (--h->refcount)
		return;

	SPLAY_REMOVE(mta_host_tree, &hosts, h);
	free(h->sa);
	free(h->ptrname);
	free(h);
	stat_decrement("mta.host", 1);
}

const char *
mta_host_to_text(struct mta_host *h)
{
	static char buf[1024];

	if (h->ptrname)
		(void)snprintf(buf, sizeof buf, "%s (%s)",
		    sa_to_text(h->sa), h->ptrname);
	else
		(void)snprintf(buf, sizeof buf, "%s", sa_to_text(h->sa));

	return (buf);
}

static int
mta_host_cmp(const struct mta_host *a, const struct mta_host *b)
{
	if (a->sa->sa_len < b->sa->sa_len)
		return (-1);
	if (a->sa->sa_len > b->sa->sa_len)
		return (1);
	return (memcmp(a->sa, b->sa, a->sa->sa_len));
}

SPLAY_GENERATE(mta_host_tree, mta_host, entry, mta_host_cmp);

static struct mta_domain *
mta_domain(char *name, int as_host)
{
	struct mta_domain	key, *d;

	key.name = name;
	key.as_host = as_host;
	d = SPLAY_FIND(mta_domain_tree, &domains, &key);

	if (d == NULL) {
		d = xcalloc(1, sizeof(*d));
		d->name = xstrdup(name);
		d->as_host = as_host;
		TAILQ_INIT(&d->mxs);
		SPLAY_INSERT(mta_domain_tree, &domains, d);
		stat_increment("mta.domain", 1);
	}

	d->refcount++;
	return (d);
}

#if 0
static void
mta_domain_ref(struct mta_domain *d)
{
	d->refcount++;
}
#endif

static void
mta_domain_unref(struct mta_domain *d)
{
	struct mta_mx	*mx;

	if (--d->refcount)
		return;

	while ((mx = TAILQ_FIRST(&d->mxs))) {
		TAILQ_REMOVE(&d->mxs, mx, entry);
		mta_host_unref(mx->host); /* from IMSG_DNS_HOST */
		free(mx->mxname);
		free(mx);
	}

	SPLAY_REMOVE(mta_domain_tree, &domains, d);
	free(d->name);
	free(d);
	stat_decrement("mta.domain", 1);
}

static int
mta_domain_cmp(const struct mta_domain *a, const struct mta_domain *b)
{
	if (a->as_host < b->as_host)
		return (-1);
	if (a->as_host > b->as_host)
		return (1);
	return (strcasecmp(a->name, b->name));
}

SPLAY_GENERATE(mta_domain_tree, mta_domain, entry, mta_domain_cmp);

static struct mta_source *
mta_source(const struct sockaddr *sa)
{
	struct mta_source	key, *s;
	struct sockaddr_storage	ss;

	if (sa) {
		memmove(&ss, sa, sa->sa_len);
		key.sa = (struct sockaddr*)&ss;
	} else
		key.sa = NULL;
	s = SPLAY_FIND(mta_source_tree, &sources, &key);

	if (s == NULL) {
		s = xcalloc(1, sizeof(*s));
		if (sa)
			s->sa = xmemdup(sa, sa->sa_len);
		SPLAY_INSERT(mta_source_tree, &sources, s);
		stat_increment("mta.source", 1);
	}

	s->refcount++;
	return (s);
}

static void
mta_source_ref(struct mta_source *s)
{
	s->refcount++;
}

static void
mta_source_unref(struct mta_source *s)
{
	if (--s->refcount)
		return;

	SPLAY_REMOVE(mta_source_tree, &sources, s);
	free(s->sa);
	free(s);
	stat_decrement("mta.source", 1);
}

static const char *
mta_source_to_text(struct mta_source *s)
{
	static char buf[1024];

	if (s->sa == NULL)
		return "[]";
	(void)snprintf(buf, sizeof buf, "%s", sa_to_text(s->sa));
	return (buf);
}

static int
mta_source_cmp(const struct mta_source *a, const struct mta_source *b)
{
	if (a->sa == NULL)
		return ((b->sa == NULL) ? 0 : -1);
	if (b->sa == NULL)
		return (1);
	if (a->sa->sa_len < b->sa->sa_len)
		return (-1);
	if (a->sa->sa_len > b->sa->sa_len)
		return (1);
	return (memcmp(a->sa, b->sa, a->sa->sa_len));
}

SPLAY_GENERATE(mta_source_tree, mta_source, entry, mta_source_cmp);

static struct mta_connector *
mta_connector(struct mta_relay *relay, struct mta_source *source)
{
	struct mta_connector	*c;

	c = tree_get(&relay->connectors, (uintptr_t)(source));
	if (c == NULL) {
		c = xcalloc(1, sizeof(*c));
		c->relay = relay;
		c->source = source;
		c->flags |= CONNECTOR_NEW;
		mta_source_ref(source);
		tree_xset(&relay->connectors, (uintptr_t)(source), c);
		stat_increment("mta.connector", 1);
		log_debug("debug: mta: new %s", mta_connector_to_text(c));
	}

	return (c);
}

static void
mta_connector_free(struct mta_connector *c)
{
	log_debug("debug: mta: freeing %s",
	    mta_connector_to_text(c));

	if (c->flags & CONNECTOR_WAIT) {
		log_debug("debug: mta: cancelling timeout for %s",
		    mta_connector_to_text(c));
		runq_cancel(runq_connector, c);
	}
	mta_source_unref(c->source); /* from constructor */
	free(c);

	stat_decrement("mta.connector", 1);
}

static const char *
mta_connector_to_text(struct mta_connector *c)
{
	static char buf[1024];

	(void)snprintf(buf, sizeof buf, "[connector:%s->%s,0x%x]",
	    mta_source_to_text(c->source),
	    mta_relay_to_text(c->relay),
	    c->flags);
	return (buf);
}

static struct mta_route *
mta_route(struct mta_source *src, struct mta_host *dst)
{
	struct mta_route	key, *r;
	static uint64_t		rid = 0;

	key.src = src;
	key.dst = dst;
	r = SPLAY_FIND(mta_route_tree, &routes, &key);

	if (r == NULL) {
		r = xcalloc(1, sizeof(*r));
		r->src = src;
		r->dst = dst;
		r->flags |= ROUTE_NEW;
		r->id = ++rid;
		SPLAY_INSERT(mta_route_tree, &routes, r);
		mta_source_ref(src);
		mta_host_ref(dst);
		stat_increment("mta.route", 1);
	}
	else if (r->flags & ROUTE_RUNQ) {
		log_debug("debug: mta: mta_route_ref(): cancelling runq for route %s",
		    mta_route_to_text(r));
		r->flags &= ~(ROUTE_RUNQ | ROUTE_KEEPALIVE);
		runq_cancel(runq_route, r);
		r->refcount--; /* from mta_route_unref() */
	}

	r->refcount++;
	return (r);
}

static void
mta_route_ref(struct mta_route *r)
{
	r->refcount++;
}

static void
mta_route_unref(struct mta_route *r)
{
	time_t	sched, now;
	int	delay;

	if (--r->refcount)
		return;

	/*
	 * Nothing references this route, but we might want to keep it alive
	 * for a while.
	 */
	now = time(NULL);
	sched = 0;

	if (r->penalty) {
#if DELAY_QUADRATIC
		delay = DELAY_ROUTE_BASE * r->penalty * r->penalty;
#else
		delay = 15 * 60;
#endif
		if (delay > DELAY_ROUTE_MAX)
			delay = DELAY_ROUTE_MAX;
		sched = r->lastpenalty + delay;
		log_debug("debug: mta: mta_route_unref(): keeping route %s alive for %llus (penalty %d)",
		    mta_route_to_text(r), (unsigned long long) sched - now, r->penalty);
	} else if (!(r->flags & ROUTE_KEEPALIVE)) {
		if (r->lastconn + max_seen_conndelay_route > now)
			sched = r->lastconn + max_seen_conndelay_route;
		if (r->lastdisc + max_seen_discdelay_route > now &&
		    r->lastdisc + max_seen_discdelay_route < sched)
			sched = r->lastdisc + max_seen_discdelay_route;

		if (sched > now)
			log_debug("debug: mta: mta_route_unref(): keeping route %s alive for %llus (imposed delay)",
			    mta_route_to_text(r), (unsigned long long) sched - now);
	}

	if (sched > now) {
		r->flags |= ROUTE_RUNQ;
		runq_schedule_at(runq_route, sched, r);
		r->refcount++;
		return;
	}

	log_debug("debug: mta: mta_route_unref(): really discarding route %s",
	    mta_route_to_text(r));

	SPLAY_REMOVE(mta_route_tree, &routes, r);
	mta_source_unref(r->src); /* from constructor */
	mta_host_unref(r->dst); /* from constructor */
	free(r);
	stat_decrement("mta.route", 1);
}

static const char *
mta_route_to_text(struct mta_route *r)
{
	static char	buf[1024];

	(void)snprintf(buf, sizeof buf, "%s <-> %s",
	    mta_source_to_text(r->src),
	    mta_host_to_text(r->dst));

	return (buf);
}

static int
mta_route_cmp(const struct mta_route *a, const struct mta_route *b)
{
	if (a->src < b->src)
		return (-1);
	if (a->src > b->src)
		return (1);

	if (a->dst < b->dst)
		return (-1);
	if (a->dst > b->dst)
		return (1);

	return (0);
}

SPLAY_GENERATE(mta_route_tree, mta_route, entry, mta_route_cmp);

void
mta_block(struct mta_source *src, char *dom)
{
	struct mta_block key, *b;

	key.source = src;
	key.domain = dom;

	b = SPLAY_FIND(mta_block_tree, &blocks, &key);
	if (b != NULL)
		return;

	b = xcalloc(1, sizeof(*b));
	if (dom)
		b->domain = xstrdup(dom);
	b->source = src;
	mta_source_ref(src);
	SPLAY_INSERT(mta_block_tree, &blocks, b);
}

void
mta_unblock(struct mta_source *src, char *dom)
{
	struct mta_block key, *b;

	key.source = src;
	key.domain = dom;

	b = SPLAY_FIND(mta_block_tree, &blocks, &key);
	if (b == NULL)
		return;

	SPLAY_REMOVE(mta_block_tree, &blocks, b);

	mta_source_unref(b->source);
	free(b->domain);
	free(b);
}

int
mta_is_blocked(struct mta_source *src, char *dom)
{
	struct mta_block key;

	key.source = src;
	key.domain = dom;

	if (SPLAY_FIND(mta_block_tree, &blocks, &key))
		return (1);

	return (0);
}

static
int
mta_block_cmp(const struct mta_block *a, const struct mta_block *b)
{
	if (a->source < b->source)
		return (-1);
	if (a->source > b->source)
		return (1);
	if (!a->domain && b->domain)
		return (-1);
	if (a->domain && !b->domain)
		return (1);
	if (a->domain == b->domain)
		return (0);
	return (strcasecmp(a->domain, b->domain));
}

SPLAY_GENERATE(mta_block_tree, mta_block, entry, mta_block_cmp);



/* hoststat errors are not critical, we do best effort */
void
mta_hoststat_update(const char *host, const char *error)
{
	struct hoststat	*hs = NULL;
	char		 buf[HOST_NAME_MAX+1];

	if (!lowercase(buf, host, sizeof buf))
		return;

	hs = dict_get(&hoststat, buf);
	if (hs == NULL) {
		if ((hs = calloc(1, sizeof *hs)) == NULL)
			return;
		tree_init(&hs->deferred);
		runq_schedule(runq_hoststat, HOSTSTAT_EXPIRE_DELAY, hs);
	}
	(void)strlcpy(hs->name, buf, sizeof hs->name);
	(void)strlcpy(hs->error, error, sizeof hs->error);
	hs->tm = time(NULL);
	dict_set(&hoststat, buf, hs);

	runq_cancel(runq_hoststat, hs);
	runq_schedule(runq_hoststat, HOSTSTAT_EXPIRE_DELAY, hs);
}

void
mta_hoststat_cache(const char *host, uint64_t evpid)
{
	struct hoststat	*hs = NULL;
	char buf[HOST_NAME_MAX+1];

	if (!lowercase(buf, host, sizeof buf))
		return;

	hs = dict_get(&hoststat, buf);
	if (hs == NULL)
		return;

	if (tree_count(&hs->deferred) >= env->sc_mta_max_deferred)
		return;

	tree_set(&hs->deferred, evpid, NULL);
}

void
mta_hoststat_uncache(const char *host, uint64_t evpid)
{
	struct hoststat	*hs = NULL;
	char buf[HOST_NAME_MAX+1];

	if (!lowercase(buf, host, sizeof buf))
		return;

	hs = dict_get(&hoststat, buf);
	if (hs == NULL)
		return;

	tree_pop(&hs->deferred, evpid);
}

void
mta_hoststat_reschedule(const char *host)
{
	struct hoststat	*hs = NULL;
	char		 buf[HOST_NAME_MAX+1];
	uint64_t	 evpid;

	if (!lowercase(buf, host, sizeof buf))
		return;

	hs = dict_get(&hoststat, buf);
	if (hs == NULL)
		return;

	while (tree_poproot(&hs->deferred, &evpid, NULL)) {
		m_compose(p_queue, IMSG_MTA_SCHEDULE, 0, 0, -1,
		    &evpid, sizeof evpid);
	}
}

static void
mta_hoststat_remove_entry(struct hoststat *hs)
{
	while (tree_poproot(&hs->deferred, NULL, NULL))
		;
	dict_pop(&hoststat, hs->name);
	runq_cancel(runq_hoststat, hs);
}