[BACK]Return to bpfdesc.h CVS log [TXT][DIR] Up to [local] / src / sys / net

File: [local] / src / sys / net / bpfdesc.h (download)

Revision 1.48, Thu Mar 9 05:56:58 2023 UTC (15 months ago) by dlg
Branch: MAIN
CVS Tags: OPENBSD_7_5_BASE, OPENBSD_7_5, OPENBSD_7_4_BASE, OPENBSD_7_4, OPENBSD_7_3_BASE, OPENBSD_7_3, HEAD
Changes since 1.47: +5 -4 lines

add a timeout between capturing a packet and making the buffer readable.

before this, there were three reasons that a bpf read will finish.

the first is the obvious one: the bpf packet buffer in the kernel
fills up. by default this is about 32k, so if you're only capturing
a small packet packet every few seconds, it can take a long time
for the buffer to fill up before you can read them.

the second is if bpf has been configured to enable immediate mode with
ioctl(BIOCIMMEDIATE). this means that when any packet is written into
the bpf buffer, the buffer is immediately readable. this is fine
if the packet rate is low, but if the packet rate is high you don't
get the benefit of buffering many packets that bpf is supposed to
provide.

the third mechanism is if bpf has been configured with the BIOCSRTIMEOUT
ioctl, which sets a maximum wait time on a bpf read. BIOCSRTIMEOUT
means than a clock starts ticking down when a program (eg pflogd)
reads from bpf. when the clock reaches zero then the read returns
with whatever is in the bpf packet buffer. however, there could be
nothing in the buffer, and the read will still complete.

deraadt@ noticed this behaviour with pflogd. it wants packets logged
by pf to end up on disk in a timely fashion, but it's fine with
tolerating a bit of delay so it can take advantatage of buffering
to amortise the cost of the reads per packet. it currently does
this with BIOCSRTIMEOUT set to half a second, which means it's
always waking up every half second even if there's nothing to log.

this diff adds BIOCSWTIMEOUT, which specifies a timeout from when
bpf first puts a packet in the capture buffer, and when the buffer
becomes readable.

by default this wait timeout is infinite, meaning the buffer has
to be filled before it becomes readable. BIOCSWTIMEOUT can be set
to enable the new functionality. BIOCIMMEDIATE is turned into a
variation of BIOCSWTIMEOUT with the wait time set to 0, ie, wait 0
seconds between when a packet is written to the buffer and when the
buffer becomes readable. combining BIOCSWTIMEOUT and
BIOCIMMEDIATE simplifies the code a lot.

for pflogd, this means if there are no packets to capture, pflogd
won't wake up every half second to do nothing.  however, when a
packet is logged by pf, bpf will wait another half second to see
if any more packets arrive (or the buffer fills up) before the read
fires.

discussed a lot with deraadt@ and sashan@
ok sashan@

/*	$OpenBSD: bpfdesc.h,v 1.48 2023/03/09 05:56:58 dlg Exp $	*/
/*	$NetBSD: bpfdesc.h,v 1.11 1995/09/27 18:30:42 thorpej Exp $	*/

/*
 * Copyright (c) 1990, 1991, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from the Stanford/CMU enet packet filter,
 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
 * Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)bpfdesc.h	8.1 (Berkeley) 6/10/93
 */

#ifndef _NET_BPFDESC_H_
#define _NET_BPFDESC_H_

#ifdef _KERNEL

/*
 * Locks used to protect struct members in this file:
 *
 *	m	the per-descriptor mutex (bpf_d.bd_mtx)
 */

struct bpf_program_smr {
	struct bpf_program	bps_bf;
	struct smr_entry	bps_smr;
};

/*
 * Descriptor associated with each open bpf file.
 */
struct bpf_d {
	SMR_SLIST_ENTRY(bpf_d) bd_next;	/* Linked list of descriptors */
	/*
	 * Buffer slots: two mbuf clusters buffer the incoming packets.
	 *   The model has three slots.  Sbuf is always occupied.
	 *   sbuf (store) - Receive interrupt puts packets here.
	 *   hbuf (hold) - When sbuf is full, put cluster here and
	 *                 wakeup read (replace sbuf with fbuf).
	 *   fbuf (free) - When read is done, put cluster here.
	 * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
	 */
	struct mutex	bd_mtx;		/* protect buffer slots below */
	caddr_t		bd_sbuf;	/* store slot */
	caddr_t		bd_hbuf;	/* hold slot */
	caddr_t		bd_fbuf;	/* free slot */
	int		bd_slen;	/* current length of store buffer */
	int		bd_hlen;	/* current length of hold buffer */
	int		bd_bufsize;	/* absolute length of buffers */

	int		bd_in_uiomove;	/* for debugging purpose */

	struct bpf_if  *bd_bif;		/* interface descriptor */
	uint64_t	bd_rtout;	/* [m] Read timeout in nanoseconds */
	uint64_t	bd_wtout;	/* [m] Wait time in nanoseconds */
	u_long		bd_nreaders;	/* [m] # threads asleep in bpfread() */
	struct bpf_program_smr
		       *bd_rfilter;	/* read filter code */
	struct bpf_program_smr
		       *bd_wfilter;	/* write filter code */
	u_long		bd_rcount;	/* number of packets received */
	u_long		bd_dcount;	/* number of packets dropped */

	u_char		bd_promisc;	/* true if listening promiscuously */
	u_char		bd_state;	/* [m] idle, waiting, or timed out */
	u_char		bd_locked;	/* true if descriptor is locked */
	u_char		bd_fildrop;	/* true if filtered packets will be dropped */
	u_char		bd_dirfilt;	/* direction filter */
	int		bd_hdrcmplt;	/* false to fill in src lladdr automatically */
	int		bd_async;	/* non-zero if packet reception should generate signal */
	int		bd_sig;		/* signal to send upon packet reception */
	struct sigio_ref
			bd_sigio;	/* async I/O registration */
	struct refcnt	bd_refcnt;	/* reference count */
	struct klist	bd_klist;	/* list of knotes */
	int		bd_unit;	/* logical unit number */
	LIST_ENTRY(bpf_d) bd_list;	/* descriptor list */

	struct task	bd_wake_task;	/* defer pgsigio() and selwakeup() */
	struct timeout	bd_wait_tmo;	/* delay wakeup after catching pkt */

	struct smr_entry
			bd_smr;
};

/*
 * Descriptor associated with each attached hardware interface.
 */
struct bpf_if {
	struct bpf_if *bif_next;	/* list of all interfaces */
	SMR_SLIST_HEAD(, bpf_d) bif_dlist;		/* descriptor list */
	struct bpf_if **bif_driverp;	/* pointer into softc */
	u_int bif_dlt;			/* link layer type */
	u_int bif_hdrlen;		/* length of header (with padding) */
	const char *bif_name;		/* name of "subsystem" */
	struct ifnet *bif_ifp;		/* corresponding interface */
};

int	 bpf_setf(struct bpf_d *, struct bpf_program *, int);
#endif /* _KERNEL */
#endif /* _NET_BPFDESC_H_ */