17
17
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
18
18
BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
19
19
20
+ #define BATCH_OPS (_name ) \
21
+ .map_lookup_batch = \
22
+ _name##_map_lookup_batch, \
23
+ .map_lookup_and_delete_batch = \
24
+ _name##_map_lookup_and_delete_batch, \
25
+ .map_update_batch = \
26
+ generic_map_update_batch, \
27
+ .map_delete_batch = \
28
+ generic_map_delete_batch
29
+
20
30
struct bucket {
21
31
struct hlist_nulls_head head ;
22
32
raw_spinlock_t lock ;
@@ -1232,6 +1242,256 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
1232
1242
rcu_read_unlock ();
1233
1243
}
1234
1244
1245
+ static int
1246
+ __htab_map_lookup_and_delete_batch (struct bpf_map * map ,
1247
+ const union bpf_attr * attr ,
1248
+ union bpf_attr __user * uattr ,
1249
+ bool do_delete , bool is_lru_map ,
1250
+ bool is_percpu )
1251
+ {
1252
+ struct bpf_htab * htab = container_of (map , struct bpf_htab , map );
1253
+ u32 bucket_cnt , total , key_size , value_size , roundup_key_size ;
1254
+ void * keys = NULL , * values = NULL , * value , * dst_key , * dst_val ;
1255
+ void __user * uvalues = u64_to_user_ptr (attr -> batch .values );
1256
+ void __user * ukeys = u64_to_user_ptr (attr -> batch .keys );
1257
+ void * ubatch = u64_to_user_ptr (attr -> batch .in_batch );
1258
+ u32 batch , max_count , size , bucket_size ;
1259
+ u64 elem_map_flags , map_flags ;
1260
+ struct hlist_nulls_head * head ;
1261
+ struct hlist_nulls_node * n ;
1262
+ unsigned long flags ;
1263
+ struct htab_elem * l ;
1264
+ struct bucket * b ;
1265
+ int ret = 0 ;
1266
+
1267
+ elem_map_flags = attr -> batch .elem_flags ;
1268
+ if ((elem_map_flags & ~BPF_F_LOCK ) ||
1269
+ ((elem_map_flags & BPF_F_LOCK ) && !map_value_has_spin_lock (map )))
1270
+ return - EINVAL ;
1271
+
1272
+ map_flags = attr -> batch .flags ;
1273
+ if (map_flags )
1274
+ return - EINVAL ;
1275
+
1276
+ max_count = attr -> batch .count ;
1277
+ if (!max_count )
1278
+ return 0 ;
1279
+
1280
+ if (put_user (0 , & uattr -> batch .count ))
1281
+ return - EFAULT ;
1282
+
1283
+ batch = 0 ;
1284
+ if (ubatch && copy_from_user (& batch , ubatch , sizeof (batch )))
1285
+ return - EFAULT ;
1286
+
1287
+ if (batch >= htab -> n_buckets )
1288
+ return - ENOENT ;
1289
+
1290
+ key_size = htab -> map .key_size ;
1291
+ roundup_key_size = round_up (htab -> map .key_size , 8 );
1292
+ value_size = htab -> map .value_size ;
1293
+ size = round_up (value_size , 8 );
1294
+ if (is_percpu )
1295
+ value_size = size * num_possible_cpus ();
1296
+ total = 0 ;
1297
+ /* while experimenting with hash tables with sizes ranging from 10 to
1298
+ * 1000, it was observed that a bucket can have upto 5 entries.
1299
+ */
1300
+ bucket_size = 5 ;
1301
+
1302
+ alloc :
1303
+ /* We cannot do copy_from_user or copy_to_user inside
1304
+ * the rcu_read_lock. Allocate enough space here.
1305
+ */
1306
+ keys = kvmalloc (key_size * bucket_size , GFP_USER | __GFP_NOWARN );
1307
+ values = kvmalloc (value_size * bucket_size , GFP_USER | __GFP_NOWARN );
1308
+ if (!keys || !values ) {
1309
+ ret = - ENOMEM ;
1310
+ goto after_loop ;
1311
+ }
1312
+
1313
+ again :
1314
+ preempt_disable ();
1315
+ this_cpu_inc (bpf_prog_active );
1316
+ rcu_read_lock ();
1317
+ again_nocopy :
1318
+ dst_key = keys ;
1319
+ dst_val = values ;
1320
+ b = & htab -> buckets [batch ];
1321
+ head = & b -> head ;
1322
+ raw_spin_lock_irqsave (& b -> lock , flags );
1323
+
1324
+ bucket_cnt = 0 ;
1325
+ hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
1326
+ bucket_cnt ++ ;
1327
+
1328
+ if (bucket_cnt > (max_count - total )) {
1329
+ if (total == 0 )
1330
+ ret = - ENOSPC ;
1331
+ raw_spin_unlock_irqrestore (& b -> lock , flags );
1332
+ rcu_read_unlock ();
1333
+ this_cpu_dec (bpf_prog_active );
1334
+ preempt_enable ();
1335
+ goto after_loop ;
1336
+ }
1337
+
1338
+ if (bucket_cnt > bucket_size ) {
1339
+ bucket_size = bucket_cnt ;
1340
+ raw_spin_unlock_irqrestore (& b -> lock , flags );
1341
+ rcu_read_unlock ();
1342
+ this_cpu_dec (bpf_prog_active );
1343
+ preempt_enable ();
1344
+ kvfree (keys );
1345
+ kvfree (values );
1346
+ goto alloc ;
1347
+ }
1348
+
1349
+ hlist_nulls_for_each_entry_safe (l , n , head , hash_node ) {
1350
+ memcpy (dst_key , l -> key , key_size );
1351
+
1352
+ if (is_percpu ) {
1353
+ int off = 0 , cpu ;
1354
+ void __percpu * pptr ;
1355
+
1356
+ pptr = htab_elem_get_ptr (l , map -> key_size );
1357
+ for_each_possible_cpu (cpu ) {
1358
+ bpf_long_memcpy (dst_val + off ,
1359
+ per_cpu_ptr (pptr , cpu ), size );
1360
+ off += size ;
1361
+ }
1362
+ } else {
1363
+ value = l -> key + roundup_key_size ;
1364
+ if (elem_map_flags & BPF_F_LOCK )
1365
+ copy_map_value_locked (map , dst_val , value ,
1366
+ true);
1367
+ else
1368
+ copy_map_value (map , dst_val , value );
1369
+ check_and_init_map_lock (map , dst_val );
1370
+ }
1371
+ if (do_delete ) {
1372
+ hlist_nulls_del_rcu (& l -> hash_node );
1373
+ if (is_lru_map )
1374
+ bpf_lru_push_free (& htab -> lru , & l -> lru_node );
1375
+ else
1376
+ free_htab_elem (htab , l );
1377
+ }
1378
+ dst_key += key_size ;
1379
+ dst_val += value_size ;
1380
+ }
1381
+
1382
+ raw_spin_unlock_irqrestore (& b -> lock , flags );
1383
+ /* If we are not copying data, we can go to next bucket and avoid
1384
+ * unlocking the rcu.
1385
+ */
1386
+ if (!bucket_cnt && (batch + 1 < htab -> n_buckets )) {
1387
+ batch ++ ;
1388
+ goto again_nocopy ;
1389
+ }
1390
+
1391
+ rcu_read_unlock ();
1392
+ this_cpu_dec (bpf_prog_active );
1393
+ preempt_enable ();
1394
+ if (bucket_cnt && (copy_to_user (ukeys + total * key_size , keys ,
1395
+ key_size * bucket_cnt ) ||
1396
+ copy_to_user (uvalues + total * value_size , values ,
1397
+ value_size * bucket_cnt ))) {
1398
+ ret = - EFAULT ;
1399
+ goto after_loop ;
1400
+ }
1401
+
1402
+ total += bucket_cnt ;
1403
+ batch ++ ;
1404
+ if (batch >= htab -> n_buckets ) {
1405
+ ret = - ENOENT ;
1406
+ goto after_loop ;
1407
+ }
1408
+ goto again ;
1409
+
1410
+ after_loop :
1411
+ if (ret == - EFAULT )
1412
+ goto out ;
1413
+
1414
+ /* copy # of entries and next batch */
1415
+ ubatch = u64_to_user_ptr (attr -> batch .out_batch );
1416
+ if (copy_to_user (ubatch , & batch , sizeof (batch )) ||
1417
+ put_user (total , & uattr -> batch .count ))
1418
+ ret = - EFAULT ;
1419
+
1420
+ out :
1421
+ kvfree (keys );
1422
+ kvfree (values );
1423
+ return ret ;
1424
+ }
1425
+
1426
+ static int
1427
+ htab_percpu_map_lookup_batch (struct bpf_map * map , const union bpf_attr * attr ,
1428
+ union bpf_attr __user * uattr )
1429
+ {
1430
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1431
+ false, true);
1432
+ }
1433
+
1434
+ static int
1435
+ htab_percpu_map_lookup_and_delete_batch (struct bpf_map * map ,
1436
+ const union bpf_attr * attr ,
1437
+ union bpf_attr __user * uattr )
1438
+ {
1439
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1440
+ false, true);
1441
+ }
1442
+
1443
+ static int
1444
+ htab_map_lookup_batch (struct bpf_map * map , const union bpf_attr * attr ,
1445
+ union bpf_attr __user * uattr )
1446
+ {
1447
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1448
+ false, false);
1449
+ }
1450
+
1451
+ static int
1452
+ htab_map_lookup_and_delete_batch (struct bpf_map * map ,
1453
+ const union bpf_attr * attr ,
1454
+ union bpf_attr __user * uattr )
1455
+ {
1456
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1457
+ false, false);
1458
+ }
1459
+
1460
+ static int
1461
+ htab_lru_percpu_map_lookup_batch (struct bpf_map * map ,
1462
+ const union bpf_attr * attr ,
1463
+ union bpf_attr __user * uattr )
1464
+ {
1465
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1466
+ true, true);
1467
+ }
1468
+
1469
+ static int
1470
+ htab_lru_percpu_map_lookup_and_delete_batch (struct bpf_map * map ,
1471
+ const union bpf_attr * attr ,
1472
+ union bpf_attr __user * uattr )
1473
+ {
1474
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1475
+ true, true);
1476
+ }
1477
+
1478
+ static int
1479
+ htab_lru_map_lookup_batch (struct bpf_map * map , const union bpf_attr * attr ,
1480
+ union bpf_attr __user * uattr )
1481
+ {
1482
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , false,
1483
+ true, false);
1484
+ }
1485
+
1486
+ static int
1487
+ htab_lru_map_lookup_and_delete_batch (struct bpf_map * map ,
1488
+ const union bpf_attr * attr ,
1489
+ union bpf_attr __user * uattr )
1490
+ {
1491
+ return __htab_map_lookup_and_delete_batch (map , attr , uattr , true,
1492
+ true, false);
1493
+ }
1494
+
1235
1495
const struct bpf_map_ops htab_map_ops = {
1236
1496
.map_alloc_check = htab_map_alloc_check ,
1237
1497
.map_alloc = htab_map_alloc ,
@@ -1242,6 +1502,7 @@ const struct bpf_map_ops htab_map_ops = {
1242
1502
.map_delete_elem = htab_map_delete_elem ,
1243
1503
.map_gen_lookup = htab_map_gen_lookup ,
1244
1504
.map_seq_show_elem = htab_map_seq_show_elem ,
1505
+ BATCH_OPS (htab ),
1245
1506
};
1246
1507
1247
1508
const struct bpf_map_ops htab_lru_map_ops = {
@@ -1255,6 +1516,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
1255
1516
.map_delete_elem = htab_lru_map_delete_elem ,
1256
1517
.map_gen_lookup = htab_lru_map_gen_lookup ,
1257
1518
.map_seq_show_elem = htab_map_seq_show_elem ,
1519
+ BATCH_OPS (htab_lru ),
1258
1520
};
1259
1521
1260
1522
/* Called from eBPF program */
@@ -1368,6 +1630,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
1368
1630
.map_update_elem = htab_percpu_map_update_elem ,
1369
1631
.map_delete_elem = htab_map_delete_elem ,
1370
1632
.map_seq_show_elem = htab_percpu_map_seq_show_elem ,
1633
+ BATCH_OPS (htab_percpu ),
1371
1634
};
1372
1635
1373
1636
const struct bpf_map_ops htab_lru_percpu_map_ops = {
@@ -1379,6 +1642,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
1379
1642
.map_update_elem = htab_lru_percpu_map_update_elem ,
1380
1643
.map_delete_elem = htab_lru_map_delete_elem ,
1381
1644
.map_seq_show_elem = htab_percpu_map_seq_show_elem ,
1645
+ BATCH_OPS (htab_lru_percpu ),
1382
1646
};
1383
1647
1384
1648
static int fd_htab_map_alloc_check (union bpf_attr * attr )
0 commit comments