Skip to content

Commit 62e7151

Browse files
Florian Westphalummakynes
Florian Westphal
authored andcommitted
netfilter: bridge: confirm multicast packets before passing them up the stack
conntrack nf_confirm logic cannot handle cloned skbs referencing the same nf_conn entry, which will happen for multicast (broadcast) frames on bridges. Example: macvlan0 | br0 / \ ethX ethY ethX (or Y) receives a L2 multicast or broadcast packet containing an IP packet, flow is not yet in conntrack table. 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. -> skb->_nfct now references a unconfirmed entry 2. skb is broad/mcast packet. bridge now passes clones out on each bridge interface. 3. skb gets passed up the stack. 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb and schedules a work queue to send them out on the lower devices. The clone skb->_nfct is not a copy, it is the same entry as the original skb. The macvlan rx handler then returns RX_HANDLER_PASS. 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. The Macvlan broadcast worker and normal confirm path will race. This race will not happen if step 2 already confirmed a clone. In that case later steps perform skb_clone() with skb->_nfct already confirmed (in hash table). This works fine. But such confirmation won't happen when eb/ip/nftables rules dropped the packets before they reached the nf_confirm step in postrouting. Pablo points out that nf_conntrack_bridge doesn't allow use of stateful nat, so we can safely discard the nf_conn entry and let inet call conntrack again. This doesn't work for bridge netfilter: skb could have a nat transformation. Also bridge nf prevents re-invocation of inet prerouting via 'sabotage_in' hook. Work around this problem by explicit confirmation of the entry at LOCAL_IN time, before upper layer has a chance to clone the unconfirmed entry. The downside is that this disables NAT and conntrack helpers. Alternative fix would be to add locking to all code parts that deal with unconfirmed packets, but even if that could be done in a sane way this opens up other problems, for example: -m physdev --physdev-out eth0 -j SNAT --snat-to 1.2.3.4 -m physdev --physdev-out eth1 -j SNAT --snat-to 1.2.3.5 For multicast case, only one of such conflicting mappings will be created, conntrack only handles 1:1 NAT mappings. Users should set create a setup that explicitly marks such traffic NOTRACK (conntrack bypass) to avoid this, but we cannot auto-bypass them, ruleset might have accept rules for untracked traffic already, so user-visible behaviour would change. Suggested-by: Pablo Neira Ayuso <[email protected]> Fixes: 1da177e ("Linux-2.6.12-rc2") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217777 Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 7e0f122 commit 62e7151

File tree

4 files changed

+128
-0
lines changed

4 files changed

+128
-0
lines changed

include/linux/netfilter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ struct nf_ct_hook {
474474
const struct sk_buff *);
475475
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
476476
void (*set_closing)(struct nf_conntrack *nfct);
477+
int (*confirm)(struct sk_buff *skb);
477478
};
478479
extern const struct nf_ct_hook __rcu *nf_ct_hook;
479480

net/bridge/br_netfilter_hooks.c

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@
4343
#include <linux/sysctl.h>
4444
#endif
4545

46+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
47+
#include <net/netfilter/nf_conntrack_core.h>
48+
#endif
49+
4650
static unsigned int brnf_net_id __read_mostly;
4751

4852
struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
553557
return NF_STOLEN;
554558
}
555559

560+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
561+
/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
562+
* the same nf_conn entry, which will happen for multicast (broadcast)
563+
* Frames on bridges.
564+
*
565+
* Example:
566+
* macvlan0
567+
* br0
568+
* ethX ethY
569+
*
570+
* ethX (or Y) receives multicast or broadcast packet containing
571+
* an IP packet, not yet in conntrack table.
572+
*
573+
* 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
574+
* -> skb->_nfct now references a unconfirmed entry
575+
* 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
576+
* interface.
577+
* 3. skb gets passed up the stack.
578+
* 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
579+
* and schedules a work queue to send them out on the lower devices.
580+
*
581+
* The clone skb->_nfct is not a copy, it is the same entry as the
582+
* original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
583+
* 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
584+
*
585+
* The Macvlan broadcast worker and normal confirm path will race.
586+
*
587+
* This race will not happen if step 2 already confirmed a clone. In that
588+
* case later steps perform skb_clone() with skb->_nfct already confirmed (in
589+
* hash table). This works fine.
590+
*
591+
* But such confirmation won't happen when eb/ip/nftables rules dropped the
592+
* packets before they reached the nf_confirm step in postrouting.
593+
*
594+
* Work around this problem by explicit confirmation of the entry at
595+
* LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
596+
* entry.
597+
*
598+
*/
599+
static unsigned int br_nf_local_in(void *priv,
600+
struct sk_buff *skb,
601+
const struct nf_hook_state *state)
602+
{
603+
struct nf_conntrack *nfct = skb_nfct(skb);
604+
const struct nf_ct_hook *ct_hook;
605+
struct nf_conn *ct;
606+
int ret;
607+
608+
if (!nfct || skb->pkt_type == PACKET_HOST)
609+
return NF_ACCEPT;
610+
611+
ct = container_of(nfct, struct nf_conn, ct_general);
612+
if (likely(nf_ct_is_confirmed(ct)))
613+
return NF_ACCEPT;
614+
615+
WARN_ON_ONCE(skb_shared(skb));
616+
WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
617+
618+
/* We can't call nf_confirm here, it would create a dependency
619+
* on nf_conntrack module.
620+
*/
621+
ct_hook = rcu_dereference(nf_ct_hook);
622+
if (!ct_hook) {
623+
skb->_nfct = 0ul;
624+
nf_conntrack_put(nfct);
625+
return NF_ACCEPT;
626+
}
627+
628+
nf_bridge_pull_encap_header(skb);
629+
ret = ct_hook->confirm(skb);
630+
switch (ret & NF_VERDICT_MASK) {
631+
case NF_STOLEN:
632+
return NF_STOLEN;
633+
default:
634+
nf_bridge_push_encap_header(skb);
635+
break;
636+
}
637+
638+
ct = container_of(nfct, struct nf_conn, ct_general);
639+
WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
640+
641+
return ret;
642+
}
643+
#endif
556644

557645
/* PF_BRIDGE/FORWARD *************************************************/
558646
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
9641052
.hooknum = NF_BR_PRE_ROUTING,
9651053
.priority = NF_BR_PRI_BRNF,
9661054
},
1055+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1056+
{
1057+
.hook = br_nf_local_in,
1058+
.pf = NFPROTO_BRIDGE,
1059+
.hooknum = NF_BR_LOCAL_IN,
1060+
.priority = NF_BR_PRI_LAST,
1061+
},
1062+
#endif
9671063
{
9681064
.hook = br_nf_forward,
9691065
.pf = NFPROTO_BRIDGE,

net/bridge/netfilter/nf_conntrack_bridge.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
291291
return nf_conntrack_in(skb, &bridge_state);
292292
}
293293

294+
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
295+
const struct nf_hook_state *state)
296+
{
297+
enum ip_conntrack_info ctinfo;
298+
struct nf_conn *ct;
299+
300+
if (skb->pkt_type == PACKET_HOST)
301+
return NF_ACCEPT;
302+
303+
/* nf_conntrack_confirm() cannot handle concurrent clones,
304+
* this happens for broad/multicast frames with e.g. macvlan on top
305+
* of the bridge device.
306+
*/
307+
ct = nf_ct_get(skb, &ctinfo);
308+
if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
309+
return NF_ACCEPT;
310+
311+
/* let inet prerouting call conntrack again */
312+
skb->_nfct = 0;
313+
nf_ct_put(ct);
314+
315+
return NF_ACCEPT;
316+
}
317+
294318
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
295319
struct nf_bridge_frag_data *data)
296320
{
@@ -385,6 +409,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
385409
.hooknum = NF_BR_PRE_ROUTING,
386410
.priority = NF_IP_PRI_CONNTRACK,
387411
},
412+
{
413+
.hook = nf_ct_bridge_in,
414+
.pf = NFPROTO_BRIDGE,
415+
.hooknum = NF_BR_LOCAL_IN,
416+
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
417+
},
388418
{
389419
.hook = nf_ct_bridge_post,
390420
.pf = NFPROTO_BRIDGE,

net/netfilter/nf_conntrack_core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
27562756
.get_tuple_skb = nf_conntrack_get_tuple_skb,
27572757
.attach = nf_conntrack_attach,
27582758
.set_closing = nf_conntrack_set_closing,
2759+
.confirm = __nf_conntrack_confirm,
27592760
};
27602761

27612762
void nf_conntrack_init_end(void)

0 commit comments

Comments
 (0)