tcp: Add a sysctl to modify listening socket FIB inheritance

Introduce the net.inet.tcp.bind_all_fibs tunable, set to 1 by default
for compatibility with current behaviour.  When set to 0, all TCP
listening sockets are private to their FIB.  Inbound connection requests
will only succeed if a matching inpcb is bound to the same FIB as the
request.

No functional change intended, as the new behaviour is not enabled by
default.

Reviewed by:	glebius
MFC after:	2 weeks
Sponsored by:	Klara, Inc.
Sponsored by:	Stormshield
Differential Revision:	https://reviews.freebsd.org/D48663

(cherry picked from commit 5dc99e9bb985dce58e8fc85c09ef4e49bf051971)
This commit is contained in:
Mark Johnston 2025-02-06 14:14:49 +00:00 committed by Franco Fichtner
parent 1c2b0605ef
commit 195b10c937
4 changed files with 48 additions and 6 deletions

View File

@ -33,7 +33,7 @@
.\"
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\"
.Dd July 28, 2024
.Dd January 10, 2025
.Dt TCP 4
.Os
.Sh NAME
@ -202,6 +202,35 @@ The alternate TCP stack must already be loaded in the kernel.
To list the available TCP stacks, see
.Va functions_available
in the
.Sx FIB support
TCP sockets are FIB-aware.
They inherit the FIB of the process which created the socket, or that of the
listening socket for sockets created by
.Xr accept 2 .
In particular, the FIB is not inherited from that of the interface where the
initiating SYN packet was received.
When an incoming connection request arrives to a listening socket, the initial
handshake also occurs in the FIB of the listening socket, not that of the
received packet.
.Pp
By default, a TCP listening socket can accept connections originating from any
FIB.
If the
.Va net.inet.tcp.bind_all_fibs
tunable is set to 0, a listening socket will only accept connections
originating
from the FIB's listening socket.
Connection requests from other FIBs will be treated as though there is no
listening socket for the destination address and port.
In this mode, multiple listening sockets owned by the same user can listen on
the same address and port so long as they belong to different FIBs, similar to
the behavior of the
.Dv SO_REUSEPORT
socket option.
If the tunable is set to 0, all sockets added to a load-balancing group created
with the
.Dv SO_REUSEPORT_LB
socket option must belong to the same FIB.
.Sx MIB (sysctl) Variables
section further down.
To list the default TCP stack, see
@ -1048,6 +1077,7 @@ when trying to use a TCP function block that is not available;
.El
.Sh SEE ALSO
.Xr getsockopt 2 ,
.Xr setfib 2 ,
.Xr socket 2 ,
.Xr stats 3 ,
.Xr sysctl 3 ,

View File

@ -137,6 +137,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_log_in_vain), 0,
"Log all incoming TCP segments to closed ports");
VNET_DEFINE(int, tcp_bind_all_fibs) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, bind_all_fibs, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(tcp_bind_all_fibs), 0,
"Bound sockets receive traffic from all FIBs");
VNET_DEFINE(int, blackhole) = 0;
#define V_blackhole VNET(blackhole)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
@ -817,7 +822,8 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
*/
lookupflag = INPLOOKUP_WILDCARD |
((thflags & (TH_ACK|TH_SYN)) == TH_SYN ?
INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB);
INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB) |
(V_tcp_bind_all_fibs ? 0 : INPLOOKUP_FIB);
findpcb:
tp = NULL;
#ifdef INET6

View File

@ -264,7 +264,8 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
}
INP_HASH_WLOCK(&V_tcbinfo);
error = in_pcbbind(inp, sinp, 0, td->td_ucred);
error = in_pcbbind(inp, sinp, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
tcp_bblog_pru(tp, PRU_BIND, error);
@ -338,7 +339,8 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
}
}
#endif
error = in6_pcbbind(inp, sin6, 0, td->td_ucred);
error = in6_pcbbind(inp, sin6, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
if (error != 0)
@ -378,7 +380,8 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
}
if (inp->inp_lport == 0) {
INP_HASH_WLOCK(&V_tcbinfo);
error = in_pcbbind(inp, NULL, 0, td->td_ucred);
error = in_pcbbind(inp, NULL,
V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
}
if (error == 0) {
@ -435,7 +438,8 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, NULL, 0, td->td_ucred);
error = in6_pcbbind(inp, NULL,
V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
}
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {

View File

@ -1283,6 +1283,7 @@ VNET_DECLARE(uint32_t, tcp_ack_war_time_window);
VNET_DECLARE(int, tcp_autorcvbuf_max);
VNET_DECLARE(int, tcp_autosndbuf_inc);
VNET_DECLARE(int, tcp_autosndbuf_max);
VNET_DECLARE(int, tcp_bind_all_fibs);
VNET_DECLARE(int, tcp_delack_enabled);
VNET_DECLARE(int, tcp_do_autorcvbuf);
VNET_DECLARE(int, tcp_do_autosndbuf);
@ -1335,6 +1336,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
#define V_tcp_bind_all_fibs VNET(tcp_bind_all_fibs)
#define V_tcp_delack_enabled VNET(tcp_delack_enabled)
#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)