Skip to content

Commit c353e89

Browse files
Paolo Abenikuba-moo
Paolo Abeni
authored andcommitted
net: introduce per netns packet chains
Currently network taps unbound to any interface are linked in the global ptype_all list, affecting the performance in all the network namespaces. Add per netns ptypes chains, so that in the mentioned case only the netns owning the packet socket(s) is affected. While at that drop the global ptype_all list: no in kernel user registers a tap on "any" type without specifying either the target device or the target namespace (and IMHO doing that would not make any sense). Note that this adds a conditional in the fast path (to check for per netns ptype_specific list) and increases the dataset size by a cacheline (owing the per netns lists). Reviewed-by: Sabrina Dubroca <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Reviewed-by: Eric Dumazet <[email protected]> Link: https://patch.msgid.link/ae405f98875ee87f8150c460ad162de7e466f8a7.1742494826.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 29abdf6 commit c353e89

File tree

7 files changed

+78
-22
lines changed

7 files changed

+78
-22
lines changed

include/linux/netdevice.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4278,7 +4278,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
42784278
return 0;
42794279
}
42804280

4281-
bool dev_nit_active(struct net_device *dev);
4281+
bool dev_nit_active_rcu(const struct net_device *dev);
4282+
static inline bool dev_nit_active(const struct net_device *dev)
4283+
{
4284+
bool ret;
4285+
4286+
rcu_read_lock();
4287+
ret = dev_nit_active_rcu(dev);
4288+
rcu_read_unlock();
4289+
return ret;
4290+
}
4291+
42824292
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
42834293

42844294
static inline void __dev_put(struct net_device *dev)

include/net/hotdata.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ struct net_hotdata {
2323
struct net_offload udpv6_offload;
2424
#endif
2525
struct list_head offload_base;
26-
struct list_head ptype_all;
2726
struct kmem_cache *skbuff_cache;
2827
struct kmem_cache *skbuff_fclone_cache;
2928
struct kmem_cache *skb_small_head_cache;

include/net/net_namespace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ struct net {
8383
struct llist_node defer_free_list;
8484
struct llist_node cleanup_list; /* namespaces on death row */
8585

86+
struct list_head ptype_all;
87+
struct list_head ptype_specific;
88+
8689
#ifdef CONFIG_KEYS
8790
struct key_tag *key_domain; /* Key domain of operation tag */
8891
#endif

net/core/dev.c

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -572,10 +572,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
572572

573573
static inline struct list_head *ptype_head(const struct packet_type *pt)
574574
{
575-
if (pt->type == htons(ETH_P_ALL))
576-
return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
577-
else
578-
return pt->dev ? &pt->dev->ptype_specific :
575+
if (pt->type == htons(ETH_P_ALL)) {
576+
if (!pt->af_packet_net && !pt->dev)
577+
return NULL;
578+
579+
return pt->dev ? &pt->dev->ptype_all :
580+
&pt->af_packet_net->ptype_all;
581+
}
582+
583+
if (pt->dev)
584+
return &pt->dev->ptype_specific;
585+
586+
return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
579587
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
580588
}
581589

@@ -596,6 +604,9 @@ void dev_add_pack(struct packet_type *pt)
596604
{
597605
struct list_head *head = ptype_head(pt);
598606

607+
if (WARN_ON_ONCE(!head))
608+
return;
609+
599610
spin_lock(&ptype_lock);
600611
list_add_rcu(&pt->list, head);
601612
spin_unlock(&ptype_lock);
@@ -620,6 +631,9 @@ void __dev_remove_pack(struct packet_type *pt)
620631
struct list_head *head = ptype_head(pt);
621632
struct packet_type *pt1;
622633

634+
if (!head)
635+
return;
636+
623637
spin_lock(&ptype_lock);
624638

625639
list_for_each_entry(pt1, head, list) {
@@ -2441,16 +2455,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
24412455
}
24422456

24432457
/**
2444-
* dev_nit_active - return true if any network interface taps are in use
2458+
* dev_nit_active_rcu - return true if any network interface taps are in use
2459+
*
2460+
* The caller must hold the RCU lock
24452461
*
24462462
* @dev: network device to check for the presence of taps
24472463
*/
2448-
bool dev_nit_active(struct net_device *dev)
2464+
bool dev_nit_active_rcu(const struct net_device *dev)
24492465
{
2450-
return !list_empty(&net_hotdata.ptype_all) ||
2466+
/* Callers may hold either RCU or RCU BH lock */
2467+
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
2468+
2469+
return !list_empty(&dev_net(dev)->ptype_all) ||
24512470
!list_empty(&dev->ptype_all);
24522471
}
2453-
EXPORT_SYMBOL_GPL(dev_nit_active);
2472+
EXPORT_SYMBOL_GPL(dev_nit_active_rcu);
24542473

24552474
/*
24562475
* Support routine. Sends outgoing frames to any network
@@ -2459,11 +2478,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
24592478

24602479
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
24612480
{
2462-
struct list_head *ptype_list = &net_hotdata.ptype_all;
24632481
struct packet_type *ptype, *pt_prev = NULL;
2482+
struct list_head *ptype_list;
24642483
struct sk_buff *skb2 = NULL;
24652484

24662485
rcu_read_lock();
2486+
ptype_list = &dev_net_rcu(dev)->ptype_all;
24672487
again:
24682488
list_for_each_entry_rcu(ptype, ptype_list, list) {
24692489
if (READ_ONCE(ptype->ignore_outgoing))
@@ -2507,7 +2527,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
25072527
pt_prev = ptype;
25082528
}
25092529

2510-
if (ptype_list == &net_hotdata.ptype_all) {
2530+
if (ptype_list != &dev->ptype_all) {
25112531
ptype_list = &dev->ptype_all;
25122532
goto again;
25132533
}
@@ -3752,7 +3772,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
37523772
unsigned int len;
37533773
int rc;
37543774

3755-
if (dev_nit_active(dev))
3775+
if (dev_nit_active_rcu(dev))
37563776
dev_queue_xmit_nit(skb, dev);
37573777

37583778
len = skb->len;
@@ -5696,7 +5716,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
56965716
if (pfmemalloc)
56975717
goto skip_taps;
56985718

5699-
list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
5719+
list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all,
5720+
list) {
57005721
if (pt_prev)
57015722
ret = deliver_skb(skb, pt_prev, orig_dev);
57025723
pt_prev = ptype;
@@ -5808,6 +5829,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
58085829
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
58095830
&ptype_base[ntohs(type) &
58105831
PTYPE_HASH_MASK]);
5832+
5833+
/* orig_dev and skb->dev could belong to different netns;
5834+
* Even in such case we need to traverse only the list
5835+
* coming from skb->dev, as the ptype owner (packet socket)
5836+
* will use dev_net(skb->dev) to do namespace filtering.
5837+
*/
5838+
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5839+
&dev_net_rcu(skb->dev)->ptype_specific);
58115840
}
58125841

58135842
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,

net/core/hotdata.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
struct net_hotdata net_hotdata __cacheline_aligned = {
99
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
10-
.ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
1110
.gro_normal_batch = 8,
1211

1312
.netdev_budget = 300,

net/core/net-procfs.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,13 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
185185
}
186186
}
187187

188-
list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) {
188+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
189+
if (i == pos)
190+
return pt;
191+
++i;
192+
}
193+
194+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) {
189195
if (i == pos)
190196
return pt;
191197
++i;
@@ -210,6 +216,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
210216

211217
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
212218
{
219+
struct net *net = seq_file_net(seq);
213220
struct net_device *dev;
214221
struct packet_type *pt;
215222
struct list_head *nxt;
@@ -232,15 +239,22 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
232239
goto found;
233240
}
234241
}
235-
236-
nxt = net_hotdata.ptype_all.next;
237-
goto ptype_all;
242+
nxt = net->ptype_all.next;
243+
goto net_ptype_all;
238244
}
239245

240-
if (pt->type == htons(ETH_P_ALL)) {
241-
ptype_all:
242-
if (nxt != &net_hotdata.ptype_all)
246+
if (pt->af_packet_net) {
247+
net_ptype_all:
248+
if (nxt != &net->ptype_all && nxt != &net->ptype_specific)
243249
goto found;
250+
251+
if (nxt == &net->ptype_all) {
252+
/* continue with ->ptype_specific if it's not empty */
253+
nxt = net->ptype_specific.next;
254+
if (nxt != &net->ptype_specific)
255+
goto found;
256+
}
257+
244258
hash = 0;
245259
nxt = ptype_base[0].next;
246260
} else

net/core/net_namespace.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
340340
lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
341341
#endif
342342

343+
INIT_LIST_HEAD(&net->ptype_all);
344+
INIT_LIST_HEAD(&net->ptype_specific);
343345
preinit_net_sysctl(net);
344346
}
345347

0 commit comments

Comments
 (0)