@@ -86,6 +86,9 @@ struct bucket {
86
86
};
87
87
};
88
88
89
+ #define HASHTAB_MAP_LOCK_COUNT 8
90
+ #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
91
+
89
92
struct bpf_htab {
90
93
struct bpf_map map ;
91
94
struct bucket * buckets ;
@@ -100,6 +103,7 @@ struct bpf_htab {
100
103
u32 elem_size ; /* size of each element in bytes */
101
104
u32 hashrnd ;
102
105
struct lock_class_key lockdep_key ;
106
+ int __percpu * map_locked [HASHTAB_MAP_LOCK_COUNT ];
103
107
};
104
108
105
109
/* each htab element is struct htab_elem + key + value */
@@ -152,26 +156,41 @@ static void htab_init_buckets(struct bpf_htab *htab)
152
156
}
153
157
}
154
158
155
- static inline unsigned long htab_lock_bucket (const struct bpf_htab * htab ,
156
- struct bucket * b )
159
+ static inline int htab_lock_bucket (const struct bpf_htab * htab ,
160
+ struct bucket * b , u32 hash ,
161
+ unsigned long * pflags )
157
162
{
158
163
unsigned long flags ;
159
164
165
+ hash = hash & HASHTAB_MAP_LOCK_MASK ;
166
+
167
+ migrate_disable ();
168
+ if (unlikely (__this_cpu_inc_return (* (htab -> map_locked [hash ])) != 1 )) {
169
+ __this_cpu_dec (* (htab -> map_locked [hash ]));
170
+ migrate_enable ();
171
+ return - EBUSY ;
172
+ }
173
+
160
174
if (htab_use_raw_lock (htab ))
161
175
raw_spin_lock_irqsave (& b -> raw_lock , flags );
162
176
else
163
177
spin_lock_irqsave (& b -> lock , flags );
164
- return flags ;
178
+ * pflags = flags ;
179
+
180
+ return 0 ;
165
181
}
166
182
167
183
static inline void htab_unlock_bucket (const struct bpf_htab * htab ,
168
- struct bucket * b ,
184
+ struct bucket * b , u32 hash ,
169
185
unsigned long flags )
170
186
{
187
+ hash = hash & HASHTAB_MAP_LOCK_MASK ;
171
188
if (htab_use_raw_lock (htab ))
172
189
raw_spin_unlock_irqrestore (& b -> raw_lock , flags );
173
190
else
174
191
spin_unlock_irqrestore (& b -> lock , flags );
192
+ __this_cpu_dec (* (htab -> map_locked [hash ]));
193
+ migrate_enable ();
175
194
}
176
195
177
196
static bool htab_lru_map_delete_node (void * arg , struct bpf_lru_node * node );
@@ -429,8 +448,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
429
448
bool percpu_lru = (attr -> map_flags & BPF_F_NO_COMMON_LRU );
430
449
bool prealloc = !(attr -> map_flags & BPF_F_NO_PREALLOC );
431
450
struct bpf_htab * htab ;
451
+ int err , i ;
432
452
u64 cost ;
433
- int err ;
434
453
435
454
htab = kzalloc (sizeof (* htab ), GFP_USER );
436
455
if (!htab )
@@ -487,6 +506,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
487
506
if (!htab -> buckets )
488
507
goto free_charge ;
489
508
509
+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ ) {
510
+ htab -> map_locked [i ] = __alloc_percpu_gfp (sizeof (int ),
511
+ sizeof (int ), GFP_USER );
512
+ if (!htab -> map_locked [i ])
513
+ goto free_map_locked ;
514
+ }
515
+
490
516
if (htab -> map .map_flags & BPF_F_ZERO_SEED )
491
517
htab -> hashrnd = 0 ;
492
518
else
@@ -497,7 +523,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
497
523
if (prealloc ) {
498
524
err = prealloc_init (htab );
499
525
if (err )
500
- goto free_buckets ;
526
+ goto free_map_locked ;
501
527
502
528
if (!percpu && !lru ) {
503
529
/* lru itself can remove the least used element, so
@@ -513,7 +539,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
513
539
514
540
free_prealloc :
515
541
prealloc_destroy (htab );
516
- free_buckets :
542
+ free_map_locked :
543
+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ )
544
+ free_percpu (htab -> map_locked [i ]);
517
545
bpf_map_area_free (htab -> buckets );
518
546
free_charge :
519
547
bpf_map_charge_finish (& htab -> map .memory );
@@ -694,20 +722,23 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
694
722
struct hlist_nulls_node * n ;
695
723
unsigned long flags ;
696
724
struct bucket * b ;
725
+ int ret ;
697
726
698
727
tgt_l = container_of (node , struct htab_elem , lru_node );
699
728
b = __select_bucket (htab , tgt_l -> hash );
700
729
head = & b -> head ;
701
730
702
- flags = htab_lock_bucket (htab , b );
731
+ ret = htab_lock_bucket (htab , b , tgt_l -> hash , & flags );
732
+ if (ret )
733
+ return false;
703
734
704
735
hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
705
736
if (l == tgt_l ) {
706
737
hlist_nulls_del_rcu (& l -> hash_node );
707
738
break ;
708
739
}
709
740
710
- htab_unlock_bucket (htab , b , flags );
741
+ htab_unlock_bucket (htab , b , tgt_l -> hash , flags );
711
742
712
743
return l == tgt_l ;
713
744
}
@@ -979,7 +1010,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
979
1010
*/
980
1011
}
981
1012
982
- flags = htab_lock_bucket (htab , b );
1013
+ ret = htab_lock_bucket (htab , b , hash , & flags );
1014
+ if (ret )
1015
+ return ret ;
983
1016
984
1017
l_old = lookup_elem_raw (head , hash , key , key_size );
985
1018
@@ -1020,7 +1053,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
1020
1053
}
1021
1054
ret = 0 ;
1022
1055
err :
1023
- htab_unlock_bucket (htab , b , flags );
1056
+ htab_unlock_bucket (htab , b , hash , flags );
1024
1057
return ret ;
1025
1058
}
1026
1059
@@ -1058,7 +1091,9 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
1058
1091
return - ENOMEM ;
1059
1092
memcpy (l_new -> key + round_up (map -> key_size , 8 ), value , map -> value_size );
1060
1093
1061
- flags = htab_lock_bucket (htab , b );
1094
+ ret = htab_lock_bucket (htab , b , hash , & flags );
1095
+ if (ret )
1096
+ return ret ;
1062
1097
1063
1098
l_old = lookup_elem_raw (head , hash , key , key_size );
1064
1099
@@ -1077,7 +1112,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
1077
1112
ret = 0 ;
1078
1113
1079
1114
err :
1080
- htab_unlock_bucket (htab , b , flags );
1115
+ htab_unlock_bucket (htab , b , hash , flags );
1081
1116
1082
1117
if (ret )
1083
1118
bpf_lru_push_free (& htab -> lru , & l_new -> lru_node );
@@ -1112,7 +1147,9 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1112
1147
b = __select_bucket (htab , hash );
1113
1148
head = & b -> head ;
1114
1149
1115
- flags = htab_lock_bucket (htab , b );
1150
+ ret = htab_lock_bucket (htab , b , hash , & flags );
1151
+ if (ret )
1152
+ return ret ;
1116
1153
1117
1154
l_old = lookup_elem_raw (head , hash , key , key_size );
1118
1155
@@ -1135,7 +1172,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1135
1172
}
1136
1173
ret = 0 ;
1137
1174
err :
1138
- htab_unlock_bucket (htab , b , flags );
1175
+ htab_unlock_bucket (htab , b , hash , flags );
1139
1176
return ret ;
1140
1177
}
1141
1178
@@ -1175,7 +1212,9 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1175
1212
return - ENOMEM ;
1176
1213
}
1177
1214
1178
- flags = htab_lock_bucket (htab , b );
1215
+ ret = htab_lock_bucket (htab , b , hash , & flags );
1216
+ if (ret )
1217
+ return ret ;
1179
1218
1180
1219
l_old = lookup_elem_raw (head , hash , key , key_size );
1181
1220
@@ -1197,7 +1236,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1197
1236
}
1198
1237
ret = 0 ;
1199
1238
err :
1200
- htab_unlock_bucket (htab , b , flags );
1239
+ htab_unlock_bucket (htab , b , hash , flags );
1201
1240
if (l_new )
1202
1241
bpf_lru_push_free (& htab -> lru , & l_new -> lru_node );
1203
1242
return ret ;
@@ -1225,7 +1264,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
1225
1264
struct htab_elem * l ;
1226
1265
unsigned long flags ;
1227
1266
u32 hash , key_size ;
1228
- int ret = - ENOENT ;
1267
+ int ret ;
1229
1268
1230
1269
WARN_ON_ONCE (!rcu_read_lock_held () && !rcu_read_lock_trace_held ());
1231
1270
@@ -1235,17 +1274,20 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
1235
1274
b = __select_bucket (htab , hash );
1236
1275
head = & b -> head ;
1237
1276
1238
- flags = htab_lock_bucket (htab , b );
1277
+ ret = htab_lock_bucket (htab , b , hash , & flags );
1278
+ if (ret )
1279
+ return ret ;
1239
1280
1240
1281
l = lookup_elem_raw (head , hash , key , key_size );
1241
1282
1242
1283
if (l ) {
1243
1284
hlist_nulls_del_rcu (& l -> hash_node );
1244
1285
free_htab_elem (htab , l );
1245
- ret = 0 ;
1286
+ } else {
1287
+ ret = - ENOENT ;
1246
1288
}
1247
1289
1248
- htab_unlock_bucket (htab , b , flags );
1290
+ htab_unlock_bucket (htab , b , hash , flags );
1249
1291
return ret ;
1250
1292
}
1251
1293
@@ -1257,7 +1299,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
1257
1299
struct htab_elem * l ;
1258
1300
unsigned long flags ;
1259
1301
u32 hash , key_size ;
1260
- int ret = - ENOENT ;
1302
+ int ret ;
1261
1303
1262
1304
WARN_ON_ONCE (!rcu_read_lock_held () && !rcu_read_lock_trace_held ());
1263
1305
@@ -1267,16 +1309,18 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
1267
1309
b = __select_bucket (htab , hash );
1268
1310
head = & b -> head ;
1269
1311
1270
- flags = htab_lock_bucket (htab , b );
1312
+ ret = htab_lock_bucket (htab , b , hash , & flags );
1313
+ if (ret )
1314
+ return ret ;
1271
1315
1272
1316
l = lookup_elem_raw (head , hash , key , key_size );
1273
1317
1274
- if (l ) {
1318
+ if (l )
1275
1319
hlist_nulls_del_rcu (& l -> hash_node );
1276
- ret = 0 ;
1277
- }
1320
+ else
1321
+ ret = - ENOENT ;
1278
1322
1279
- htab_unlock_bucket (htab , b , flags );
1323
+ htab_unlock_bucket (htab , b , hash , flags );
1280
1324
if (l )
1281
1325
bpf_lru_push_free (& htab -> lru , & l -> lru_node );
1282
1326
return ret ;
@@ -1302,6 +1346,7 @@ static void delete_all_elements(struct bpf_htab *htab)
1302
1346
static void htab_map_free (struct bpf_map * map )
1303
1347
{
1304
1348
struct bpf_htab * htab = container_of (map , struct bpf_htab , map );
1349
+ int i ;
1305
1350
1306
1351
/* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
1307
1352
* bpf_free_used_maps() is called after bpf prog is no longer executing.
@@ -1320,6 +1365,8 @@ static void htab_map_free(struct bpf_map *map)
1320
1365
free_percpu (htab -> extra_elems );
1321
1366
bpf_map_area_free (htab -> buckets );
1322
1367
lockdep_unregister_key (& htab -> lockdep_key );
1368
+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ )
1369
+ free_percpu (htab -> map_locked [i ]);
1323
1370
kfree (htab );
1324
1371
}
1325
1372
@@ -1423,8 +1470,11 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
1423
1470
b = & htab -> buckets [batch ];
1424
1471
head = & b -> head ;
1425
1472
/* do not grab the lock unless need it (bucket_cnt > 0). */
1426
- if (locked )
1427
- flags = htab_lock_bucket (htab , b );
1473
+ if (locked ) {
1474
+ ret = htab_lock_bucket (htab , b , batch , & flags );
1475
+ if (ret )
1476
+ goto next_batch ;
1477
+ }
1428
1478
1429
1479
bucket_cnt = 0 ;
1430
1480
hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
@@ -1441,7 +1491,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
1441
1491
/* Note that since bucket_cnt > 0 here, it is implicit
1442
1492
* that the locked was grabbed, so release it.
1443
1493
*/
1444
- htab_unlock_bucket (htab , b , flags );
1494
+ htab_unlock_bucket (htab , b , batch , flags );
1445
1495
rcu_read_unlock ();
1446
1496
bpf_enable_instrumentation ();
1447
1497
goto after_loop ;
@@ -1452,7 +1502,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
1452
1502
/* Note that since bucket_cnt > 0 here, it is implicit
1453
1503
* that the locked was grabbed, so release it.
1454
1504
*/
1455
- htab_unlock_bucket (htab , b , flags );
1505
+ htab_unlock_bucket (htab , b , batch , flags );
1456
1506
rcu_read_unlock ();
1457
1507
bpf_enable_instrumentation ();
1458
1508
kvfree (keys );
@@ -1505,7 +1555,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
1505
1555
dst_val += value_size ;
1506
1556
}
1507
1557
1508
- htab_unlock_bucket (htab , b , flags );
1558
+ htab_unlock_bucket (htab , b , batch , flags );
1509
1559
locked = false;
1510
1560
1511
1561
while (node_to_free ) {
0 commit comments