mirror of
https://github.com/opnsense/src.git
synced 2026-02-04 03:00:53 -06:00
inpcb: Close some SO_REUSEPORT_LB races
For a long time, the inpcb lookup path has been lockless in the common case: we use net_epoch to synchronize lookups. However, the routines which update lbgroups were not careful to synchronize with unlocked lookups. I believe that in the worst case this can result in spurious connection aborts (I have a regression test case to exercise this), but it's hard to be certain. Modify in_pcblbgroup* routines to synchronize with unlocked lookup: - When removing inpcbs from an lbgroup, do not shrink the array. The maximum number of lbgroup entries is INPCBLBGROUP_SIZMAX (256), and it doesn't seem worth the complexity to shrink the array when a socket is removed. - When resizing an lbgroup, do not insert it into the hash table until it is fully initialized; otherwise lookups may observe a partially constructed lbgroup. - When adding an inpcb to the group, increment the counter after adding the array entry, using a release store. Otherwise it's possible for lookups to observe a null array slot. - When looking up an entry, use a corresponding acquire load. Reviewed by: ae, glebius MFC after: 1 month Sponsored by: Klara, Inc. Sponsored by: Stormshield Differential Revision: https://reviews.freebsd.org/D48020 (cherry picked from commit a600aabe9b04f0906069a8fb1f8d696ad186080f)
This commit is contained in:
parent
74986b481c
commit
707000f30f
@ -255,9 +255,8 @@ static void in_pcbremhash(struct inpcb *);
|
||||
*/
|
||||
|
||||
static struct inpcblbgroup *
|
||||
in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, struct ucred *cred,
|
||||
u_char vflag, uint16_t port, const union in_dependaddr *addr, int size,
|
||||
uint8_t numa_domain)
|
||||
in_pcblbgroup_alloc(struct ucred *cred, u_char vflag, uint16_t port,
|
||||
const union in_dependaddr *addr, int size, uint8_t numa_domain)
|
||||
{
|
||||
struct inpcblbgroup *grp;
|
||||
size_t bytes;
|
||||
@ -272,7 +271,6 @@ in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, struct ucred *cred,
|
||||
grp->il_numa_domain = numa_domain;
|
||||
grp->il_dependladdr = *addr;
|
||||
grp->il_inpsiz = size;
|
||||
CK_LIST_INSERT_HEAD(hdr, grp, il_list);
|
||||
return (grp);
|
||||
}
|
||||
|
||||
@ -294,6 +292,24 @@ in_pcblbgroup_free(struct inpcblbgroup *grp)
|
||||
NET_EPOCH_CALL(in_pcblbgroup_free_deferred, &grp->il_epoch_ctx);
|
||||
}
|
||||
|
||||
static void
|
||||
in_pcblbgroup_insert(struct inpcblbgroup *grp, struct inpcb *inp)
|
||||
{
|
||||
KASSERT(grp->il_inpcnt < grp->il_inpsiz,
|
||||
("invalid local group size %d and count %d", grp->il_inpsiz,
|
||||
grp->il_inpcnt));
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
|
||||
inp->inp_flags |= INP_INLBGROUP;
|
||||
grp->il_inp[grp->il_inpcnt] = inp;
|
||||
|
||||
/*
|
||||
* Synchronize with in_pcblookup_lbgroup(): make sure that we don't
|
||||
* expose a null slot to the lookup path.
|
||||
*/
|
||||
atomic_store_rel_int(&grp->il_inpcnt, grp->il_inpcnt + 1);
|
||||
}
|
||||
|
||||
static struct inpcblbgroup *
|
||||
in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
|
||||
struct inpcblbgroup *old_grp, int size)
|
||||
@ -301,7 +317,7 @@ in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
|
||||
struct inpcblbgroup *grp;
|
||||
int i;
|
||||
|
||||
grp = in_pcblbgroup_alloc(hdr, old_grp->il_cred, old_grp->il_vflag,
|
||||
grp = in_pcblbgroup_alloc(old_grp->il_cred, old_grp->il_vflag,
|
||||
old_grp->il_lport, &old_grp->il_dependladdr, size,
|
||||
old_grp->il_numa_domain);
|
||||
if (grp == NULL)
|
||||
@ -314,34 +330,11 @@ in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
|
||||
for (i = 0; i < old_grp->il_inpcnt; ++i)
|
||||
grp->il_inp[i] = old_grp->il_inp[i];
|
||||
grp->il_inpcnt = old_grp->il_inpcnt;
|
||||
CK_LIST_INSERT_HEAD(hdr, grp, il_list);
|
||||
in_pcblbgroup_free(old_grp);
|
||||
return (grp);
|
||||
}
|
||||
|
||||
/*
|
||||
* PCB at index 'i' is removed from the group. Pull up the ones below il_inp[i]
|
||||
* and shrink group if possible.
|
||||
*/
|
||||
static void
|
||||
in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp,
|
||||
int i)
|
||||
{
|
||||
struct inpcblbgroup *grp, *new_grp;
|
||||
|
||||
grp = *grpp;
|
||||
for (; i + 1 < grp->il_inpcnt; ++i)
|
||||
grp->il_inp[i] = grp->il_inp[i + 1];
|
||||
grp->il_inpcnt--;
|
||||
|
||||
if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
|
||||
grp->il_inpcnt <= grp->il_inpsiz / 4) {
|
||||
/* Shrink this group. */
|
||||
new_grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
|
||||
if (new_grp != NULL)
|
||||
*grpp = new_grp;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Add PCB to load balance group for SO_REUSEPORT_LB option.
|
||||
*/
|
||||
@ -386,11 +379,13 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
|
||||
}
|
||||
if (grp == NULL) {
|
||||
/* Create new load balance group. */
|
||||
grp = in_pcblbgroup_alloc(hdr, inp->inp_cred, inp->inp_vflag,
|
||||
grp = in_pcblbgroup_alloc(inp->inp_cred, inp->inp_vflag,
|
||||
inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
|
||||
INPCBLBGROUP_SIZMIN, numa_domain);
|
||||
if (grp == NULL)
|
||||
return (ENOBUFS);
|
||||
in_pcblbgroup_insert(grp, inp);
|
||||
CK_LIST_INSERT_HEAD(hdr, grp, il_list);
|
||||
} else if (grp->il_inpcnt == grp->il_inpsiz) {
|
||||
if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
|
||||
if (ratecheck(&lastprint, &interval))
|
||||
@ -403,15 +398,10 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
|
||||
grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
|
||||
if (grp == NULL)
|
||||
return (ENOBUFS);
|
||||
in_pcblbgroup_insert(grp, inp);
|
||||
} else {
|
||||
in_pcblbgroup_insert(grp, inp);
|
||||
}
|
||||
|
||||
KASSERT(grp->il_inpcnt < grp->il_inpsiz,
|
||||
("invalid local group size %d and count %d", grp->il_inpsiz,
|
||||
grp->il_inpcnt));
|
||||
|
||||
grp->il_inp[grp->il_inpcnt] = inp;
|
||||
grp->il_inpcnt++;
|
||||
inp->inp_flags |= INP_INLBGROUP;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -443,8 +433,17 @@ in_pcbremlbgrouphash(struct inpcb *inp)
|
||||
/* We are the last, free this local group. */
|
||||
in_pcblbgroup_free(grp);
|
||||
} else {
|
||||
/* Pull up inpcbs, shrink group if possible. */
|
||||
in_pcblbgroup_reorder(hdr, &grp, i);
|
||||
KASSERT(grp->il_inpcnt >= 2,
|
||||
("invalid local group count %d",
|
||||
grp->il_inpcnt));
|
||||
grp->il_inp[i] =
|
||||
grp->il_inp[grp->il_inpcnt - 1];
|
||||
|
||||
/*
|
||||
* Synchronize with in_pcblookup_lbgroup().
|
||||
*/
|
||||
atomic_store_rel_int(&grp->il_inpcnt,
|
||||
grp->il_inpcnt - 1);
|
||||
}
|
||||
inp->inp_flags &= ~INP_INLBGROUP;
|
||||
return;
|
||||
@ -2112,8 +2111,11 @@ in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||
const struct inpcblbgrouphead *hdr;
|
||||
struct inpcblbgroup *grp;
|
||||
struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
|
||||
struct inpcb *inp;
|
||||
u_int count;
|
||||
|
||||
INP_HASH_LOCK_ASSERT(pcbinfo);
|
||||
NET_EPOCH_ASSERT();
|
||||
|
||||
hdr = &pcbinfo->ipi_lbgrouphashbase[
|
||||
INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
|
||||
@ -2172,9 +2174,17 @@ in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||
grp = local_wild;
|
||||
if (grp == NULL)
|
||||
return (NULL);
|
||||
|
||||
out:
|
||||
return (grp->il_inp[INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) %
|
||||
grp->il_inpcnt]);
|
||||
/*
|
||||
* Synchronize with in_pcblbgroup_insert().
|
||||
*/
|
||||
count = atomic_load_acq_int(&grp->il_inpcnt);
|
||||
if (count == 0)
|
||||
return (NULL);
|
||||
inp = grp->il_inp[INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) % count];
|
||||
KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
|
||||
return (inp);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
||||
@ -925,6 +925,8 @@ in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||
const struct inpcblbgrouphead *hdr;
|
||||
struct inpcblbgroup *grp;
|
||||
struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
|
||||
struct inpcb *inp;
|
||||
u_int count;
|
||||
|
||||
INP_HASH_LOCK_ASSERT(pcbinfo);
|
||||
|
||||
@ -986,8 +988,15 @@ in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
|
||||
if (grp == NULL)
|
||||
return (NULL);
|
||||
out:
|
||||
return (grp->il_inp[INP6_PCBLBGROUP_PKTHASH(faddr, lport, fport) %
|
||||
grp->il_inpcnt]);
|
||||
/*
|
||||
* Synchronize with in_pcblbgroup_insert().
|
||||
*/
|
||||
count = atomic_load_acq_int(&grp->il_inpcnt);
|
||||
if (count == 0)
|
||||
return (NULL);
|
||||
inp = grp->il_inp[INP6_PCBLBGROUP_PKTHASH(faddr, lport, fport) % count];
|
||||
KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
|
||||
return (inp);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user