Skip to content

Commit e53376b

Browse files
committed
netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt
With this patch, the conntrack refcount is initially set to zero and it is bumped once it is added to any of the list, so we fulfill Eric's golden rule which is that all released objects always have a refcount that equals zero. Andrey Vagin reports that nf_conntrack_free can't be called for a conntrack with non-zero ref-counter, because it can race with nf_conntrack_find_get(). A conntrack slab is created with SLAB_DESTROY_BY_RCU. Non-zero ref-counter says that this conntrack is used. So when we release a conntrack with non-zero counter, we break this assumption. CPU1 CPU2 ____nf_conntrack_find() nf_ct_put() destroy_conntrack() ... init_conntrack __nf_conntrack_alloc (set use = 1) atomic_inc_not_zero(&ct->use) (use = 2) if (!l4proto->new(ct, skb, dataoff, timeouts)) nf_conntrack_free(ct); (use = 2 !!!) ... __nf_conntrack_alloc (set use = 1) if (!nf_ct_key_equal(h, tuple, zone)) nf_ct_put(ct); (use = 0) destroy_conntrack() /* continue to work with CT */ After applying the path "[PATCH] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get" another bug was triggered in destroy_conntrack(): <4>[67096.759334] ------------[ cut here ]------------ <2>[67096.759353] kernel BUG at net/netfilter/nf_conntrack_core.c:211! ... <4>[67096.759837] Pid: 498649, comm: atdd veid: 666 Tainted: G C --------------- 2.6.32-042stab084.18 #1 042stab084_18 /DQ45CB <4>[67096.759932] RIP: 0010:[<ffffffffa03d99ac>] [<ffffffffa03d99ac>] destroy_conntrack+0x15c/0x190 [nf_conntrack] <4>[67096.760255] Call Trace: <4>[67096.760255] [<ffffffff814844a7>] nf_conntrack_destroy+0x17/0x30 <4>[67096.760255] [<ffffffffa03d9bb5>] nf_conntrack_find_get+0x85/0x130 [nf_conntrack] <4>[67096.760255] [<ffffffffa03d9fb2>] nf_conntrack_in+0x352/0xb60 [nf_conntrack] <4>[67096.760255] [<ffffffffa048c771>] ipv4_conntrack_local+0x51/0x60 [nf_conntrack_ipv4] <4>[67096.760255] [<ffffffff81484419>] nf_iterate+0x69/0xb0 <4>[67096.760255] [<ffffffff814b5b00>] ? dst_output+0x0/0x20 <4>[67096.760255] [<ffffffff814845d4>] nf_hook_slow+0x74/0x110 <4>[67096.760255] [<ffffffff814b5b00>] ? dst_output+0x0/0x20 <4>[67096.760255] [<ffffffff814b66d5>] raw_sendmsg+0x775/0x910 <4>[67096.760255] [<ffffffff8104c5a8>] ? flush_tlb_others_ipi+0x128/0x130 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff814c136a>] inet_sendmsg+0x4a/0xb0 <4>[67096.760255] [<ffffffff81444e93>] ? sock_sendmsg+0x13/0x140 <4>[67096.760255] [<ffffffff81444f97>] sock_sendmsg+0x117/0x140 <4>[67096.760255] [<ffffffff8102e299>] ? native_smp_send_reschedule+0x49/0x60 <4>[67096.760255] [<ffffffff81519beb>] ? _spin_unlock_bh+0x1b/0x20 <4>[67096.760255] [<ffffffff8109d930>] ? autoremove_wake_function+0x0/0x40 <4>[67096.760255] [<ffffffff814960f0>] ? do_ip_setsockopt+0x90/0xd80 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff814457c9>] sys_sendto+0x139/0x190 <4>[67096.760255] [<ffffffff810efa77>] ? audit_syscall_entry+0x1d7/0x200 <4>[67096.760255] [<ffffffff810ef7c5>] ? __audit_syscall_exit+0x265/0x290 <4>[67096.760255] [<ffffffff81474daf>] compat_sys_socketcall+0x13f/0x210 <4>[67096.760255] [<ffffffff8104dea3>] ia32_sysret+0x0/0x5 I have reused the original title for the RFC patch that Andrey posted and most of the original patch description. Cc: Eric Dumazet <[email protected]> Cc: Andrew Vagin <[email protected]> Cc: Florian Westphal <[email protected]> Reported-by: Andrew Vagin <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]> Reviewed-by: Eric Dumazet <[email protected]> Acked-by: Andrew Vagin <[email protected]>
1 parent 829d931 commit e53376b

File tree

4 files changed

+34
-14
lines changed

4 files changed

+34
-14
lines changed

include/net/netfilter/nf_conntrack.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ extern unsigned int nf_conntrack_max;
284284
extern unsigned int nf_conntrack_hash_rnd;
285285
void init_nf_conntrack_hash_rnd(void);
286286

287+
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl);
288+
287289
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
288290
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
289291

net/netfilter/nf_conntrack_core.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
448448
goto out;
449449

450450
add_timer(&ct->timeout);
451-
nf_conntrack_get(&ct->ct_general);
451+
smp_wmb();
452+
/* The caller holds a reference to this object */
453+
atomic_set(&ct->ct_general.use, 2);
452454
__nf_conntrack_hash_insert(ct, hash, repl_hash);
453455
NF_CT_STAT_INC(net, insert);
454456
spin_unlock_bh(&nf_conntrack_lock);
@@ -462,6 +464,21 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
462464
}
463465
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
464466

467+
/* deletion from this larval template list happens via nf_ct_put() */
468+
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
469+
{
470+
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
471+
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
472+
nf_conntrack_get(&tmpl->ct_general);
473+
474+
spin_lock_bh(&nf_conntrack_lock);
475+
/* Overload tuple linked list to put us in template list. */
476+
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
477+
&net->ct.tmpl);
478+
spin_unlock_bh(&nf_conntrack_lock);
479+
}
480+
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
481+
465482
/* Confirm a connection given skb; places it in hash table */
466483
int
467484
__nf_conntrack_confirm(struct sk_buff *skb)
@@ -733,11 +750,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
733750
nf_ct_zone->id = zone;
734751
}
735752
#endif
736-
/*
737-
* changes to lookup keys must be done before setting refcnt to 1
753+
/* Because we use RCU lookups, we set ct_general.use to zero before
754+
* this is inserted in any list.
738755
*/
739-
smp_wmb();
740-
atomic_set(&ct->ct_general.use, 1);
756+
atomic_set(&ct->ct_general.use, 0);
741757
return ct;
742758

743759
#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -761,6 +777,11 @@ void nf_conntrack_free(struct nf_conn *ct)
761777
{
762778
struct net *net = nf_ct_net(ct);
763779

780+
/* A freed object has refcnt == 0, that's
781+
* the golden rule for SLAB_DESTROY_BY_RCU
782+
*/
783+
NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
784+
764785
nf_ct_ext_destroy(ct);
765786
nf_ct_ext_free(ct);
766787
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
@@ -856,6 +877,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
856877
NF_CT_STAT_INC(net, new);
857878
}
858879

880+
/* Now it is inserted into the unconfirmed list, bump refcount */
881+
nf_conntrack_get(&ct->ct_general);
882+
859883
/* Overload tuple linked list to put us in unconfirmed list. */
860884
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
861885
&net->ct.unconfirmed);

net/netfilter/nf_synproxy_core.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
363363
goto err2;
364364
if (!nfct_synproxy_ext_add(ct))
365365
goto err2;
366-
__set_bit(IPS_TEMPLATE_BIT, &ct->status);
367-
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
368366

367+
nf_conntrack_tmpl_insert(net, ct);
369368
snet->tmpl = ct;
370369

371370
snet->stats = alloc_percpu(struct synproxy_stats);
@@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net)
390389
{
391390
struct synproxy_net *snet = synproxy_pernet(net);
392391

393-
nf_conntrack_free(snet->tmpl);
392+
nf_ct_put(snet->tmpl);
394393
synproxy_proc_exit(net);
395394
free_percpu(snet->stats);
396395
}

net/netfilter/xt_CT.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
228228
goto err3;
229229
}
230230

231-
__set_bit(IPS_TEMPLATE_BIT, &ct->status);
232-
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
233-
234-
/* Overload tuple linked list to put us in template list. */
235-
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
236-
&par->net->ct.tmpl);
231+
nf_conntrack_tmpl_insert(par->net, ct);
237232
out:
238233
info->ct = ct;
239234
return 0;

0 commit comments

Comments
 (0)