Skip to content

Commit fd258f2

Browse files
committed
Merge branch 'udp-pernetns-hash'
Kuniyuki Iwashima says: ==================== udp: Introduce optional per-netns hash table. This series is the UDP version of the per-netns ehash series [0], which were initially in the same patch set. [1] The notable difference with TCP is the max table size is 64K and the min size is 128. This is because the possible hash range by udp_hashfn() always fits in 64K within the same netns and because we want to keep a bitmap in udp_lib_get_port() on the stack. Also, the UDP per-netns table isolates both 1-tuple and 2-tuple tables. For details, please see the last patch. patch 1 - 4: prep for per-netns hash table patch 5: add per-netns hash table [0]: https://lore.kernel.org/netdev/[email protected]/ [1]: https://lore.kernel.org/netdev/[email protected]/ ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents e882256 + 9804985 commit fd258f2

File tree

10 files changed

+261
-58
lines changed

10 files changed

+261
-58
lines changed

Documentation/networking/ip-sysctl.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,6 +1177,33 @@ udp_rmem_min - INTEGER
11771177
udp_wmem_min - INTEGER
11781178
UDP does not have tx memory accounting and this tunable has no effect.
11791179

1180+
udp_hash_entries - INTEGER
1181+
Show the number of hash buckets for UDP sockets in the current
1182+
networking namespace.
1183+
1184+
A negative value means the networking namespace does not own its
1185+
hash buckets and shares the initial networking namespace's one.
1186+
1187+
udp_child_ehash_entries - INTEGER
1188+
Control the number of hash buckets for UDP sockets in the child
1189+
networking namespace, which must be set before clone() or unshare().
1190+
1191+
If the value is not 0, the kernel uses a value rounded up to 2^n
1192+
as the actual hash bucket size. 0 is a special value, meaning
1193+
the child networking namespace will share the initial networking
1194+
namespace's hash buckets.
1195+
1196+
Note that the child will use the global one in case the kernel
1197+
fails to allocate enough memory. In addition, the global hash
1198+
buckets are spread over available NUMA nodes, but the allocation
1199+
of the child hash table depends on the current process's NUMA
1200+
policy, which could result in performance differences.
1201+
1202+
Possible values: 0, 2^n (n: 7 (128) - 16 (64K))
1203+
1204+
Default: 0
1205+
1206+
11801207
RAW variables
11811208
=============
11821209

include/linux/udp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
2323
return (struct udphdr *)skb_transport_header(skb);
2424
}
2525

26+
#define UDP_HTABLE_SIZE_MIN_PERNET 128
2627
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
28+
#define UDP_HTABLE_SIZE_MAX 65536
2729

2830
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
2931
{

include/net/netns/ipv4.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct tcp_fastopen_context;
4343

4444
struct netns_ipv4 {
4545
struct inet_timewait_death_row tcp_death_row;
46+
struct udp_table *udp_table;
4647

4748
#ifdef CONFIG_SYSCTL
4849
struct ctl_table_header *forw_hdr;
@@ -207,6 +208,8 @@ struct netns_ipv4 {
207208

208209
atomic_t dev_addr_genid;
209210

211+
unsigned int sysctl_udp_child_hash_entries;
212+
210213
#ifdef CONFIG_SYSCTL
211214
unsigned long *sysctl_local_reserved_ports;
212215
int sysctl_ip_prot_sock;

net/core/filter.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6432,7 +6432,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
64326432
else
64336433
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
64346434
dst4, tuple->ipv4.dport,
6435-
dif, sdif, &udp_table, NULL);
6435+
dif, sdif, net->ipv4.udp_table, NULL);
64366436
#if IS_ENABLED(CONFIG_IPV6)
64376437
} else {
64386438
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
@@ -6448,7 +6448,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
64486448
src6, tuple->ipv6.sport,
64496449
dst6, tuple->ipv6.dport,
64506450
dif, sdif,
6451-
&udp_table, NULL);
6451+
net->ipv4.udp_table, NULL);
64526452
#endif
64536453
}
64546454

net/ipv4/sysctl_net_ipv4.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static int one_day_secs = 24 * 3600;
4040
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
4141
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
4242
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
43+
static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX;
4344
static int tcp_plb_max_rounds = 31;
4445
static int tcp_plb_max_cong_thresh = 256;
4546

@@ -402,12 +403,36 @@ static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
402403
if (!net_eq(net, &init_net) && !hinfo->pernet)
403404
tcp_ehash_entries *= -1;
404405

406+
memset(&tbl, 0, sizeof(tbl));
405407
tbl.data = &tcp_ehash_entries;
406408
tbl.maxlen = sizeof(int);
407409

408410
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
409411
}
410412

413+
static int proc_udp_hash_entries(struct ctl_table *table, int write,
414+
void *buffer, size_t *lenp, loff_t *ppos)
415+
{
416+
struct net *net = container_of(table->data, struct net,
417+
ipv4.sysctl_udp_child_hash_entries);
418+
int udp_hash_entries;
419+
struct ctl_table tbl;
420+
421+
udp_hash_entries = net->ipv4.udp_table->mask + 1;
422+
423+
/* A negative number indicates that the child netns
424+
* shares the global udp_table.
425+
*/
426+
if (!net_eq(net, &init_net) && net->ipv4.udp_table == &udp_table)
427+
udp_hash_entries *= -1;
428+
429+
memset(&tbl, 0, sizeof(tbl));
430+
tbl.data = &udp_hash_entries;
431+
tbl.maxlen = sizeof(int);
432+
433+
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
434+
}
435+
411436
#ifdef CONFIG_IP_ROUTE_MULTIPATH
412437
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
413438
void *buffer, size_t *lenp,
@@ -1361,6 +1386,21 @@ static struct ctl_table ipv4_net_table[] = {
13611386
.extra1 = SYSCTL_ZERO,
13621387
.extra2 = &tcp_child_ehash_entries_max,
13631388
},
1389+
{
1390+
.procname = "udp_hash_entries",
1391+
.data = &init_net.ipv4.sysctl_udp_child_hash_entries,
1392+
.mode = 0444,
1393+
.proc_handler = proc_udp_hash_entries,
1394+
},
1395+
{
1396+
.procname = "udp_child_hash_entries",
1397+
.data = &init_net.ipv4.sysctl_udp_child_hash_entries,
1398+
.maxlen = sizeof(unsigned int),
1399+
.mode = 0644,
1400+
.proc_handler = proc_douintvec_minmax,
1401+
.extra1 = SYSCTL_ZERO,
1402+
.extra2 = &udp_child_hash_entries_max,
1403+
},
13641404
{
13651405
.procname = "udp_rmem_min",
13661406
.data = &init_net.ipv4.sysctl_udp_rmem_min,

0 commit comments

Comments
 (0)