Skip to content

Commit e125fbe

Browse files
Alexander Aringteigland
Alexander Aring
authored andcommitted
fs: dlm: fix mark setting deadlock
This patch fixes an deadlock issue when dlm_lowcomms_close() is called. When dlm_lowcomms_close() is called the clusters_root.subsys.su_mutex is held to remove configfs items. At this time we flushing (e.g. cancel_work_sync()) the workers of send and recv workqueue. Due the fact that we accessing configfs items (mark values), these workers will lock clusters_root.subsys.su_mutex as well which are already hold by dlm_lowcomms_close() and ends in a deadlock situation. [67170.703046] ====================================================== [67170.703965] WARNING: possible circular locking dependency detected [67170.704758] 5.11.0-rc4+ #22 Tainted: G W [67170.705433] ------------------------------------------------------ [67170.706228] dlm_controld/280 is trying to acquire lock: [67170.706915] ffff9f2f475a6948 ((wq_completion)dlm_recv){+.+.}-{0:0}, at: __flush_work+0x203/0x4c0 [67170.708026] but task is already holding lock: [67170.708758] ffffffffa132f878 (&clusters_root.subsys.su_mutex){+.+.}-{3:3}, at: configfs_rmdir+0x29b/0x310 [67170.710016] which lock already depends on the new lock. The new behaviour adds the mark value to the node address configuration which doesn't require to held the clusters_root.subsys.su_mutex by accessing mark values in a separate datastructure. However the mark values can be set now only after a node address was set which is the case when the user is using dlm_controld. Signed-off-by: Alexander Aring <[email protected]> Signed-off-by: David Teigland <[email protected]>
1 parent 92c4895 commit e125fbe

File tree

4 files changed

+45
-35
lines changed

4 files changed

+45
-35
lines changed

fs/dlm/config.c

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -688,14 +688,23 @@ static ssize_t comm_mark_show(struct config_item *item, char *buf)
688688
static ssize_t comm_mark_store(struct config_item *item, const char *buf,
689689
size_t len)
690690
{
691+
struct dlm_comm *comm;
691692
unsigned int mark;
692693
int rc;
693694

694695
rc = kstrtouint(buf, 0, &mark);
695696
if (rc)
696697
return rc;
697698

698-
config_item_to_comm(item)->mark = mark;
699+
if (mark == 0)
700+
mark = dlm_config.ci_mark;
701+
702+
comm = config_item_to_comm(item);
703+
rc = dlm_lowcomms_nodes_set_mark(comm->nodeid, mark);
704+
if (rc)
705+
return rc;
706+
707+
comm->mark = mark;
699708
return len;
700709
}
701710

@@ -870,24 +879,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
870879
return 0;
871880
}
872881

873-
void dlm_comm_mark(int nodeid, unsigned int *mark)
874-
{
875-
struct dlm_comm *cm;
876-
877-
cm = get_comm(nodeid);
878-
if (!cm) {
879-
*mark = dlm_config.ci_mark;
880-
return;
881-
}
882-
883-
if (cm->mark)
884-
*mark = cm->mark;
885-
else
886-
*mark = dlm_config.ci_mark;
887-
888-
put_comm(cm);
889-
}
890-
891882
int dlm_our_nodeid(void)
892883
{
893884
return local_comm ? local_comm->nodeid : 0;

fs/dlm/config.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ void dlm_config_exit(void);
4848
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
4949
int *count_out);
5050
int dlm_comm_seq(int nodeid, uint32_t *seq);
51-
void dlm_comm_mark(int nodeid, unsigned int *mark);
5251
int dlm_our_nodeid(void);
5352
int dlm_our_addr(struct sockaddr_storage *addr, int num);
5453

fs/dlm/lowcomms.c

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ struct writequeue_entry {
116116
struct dlm_node_addr {
117117
struct list_head list;
118118
int nodeid;
119+
int mark;
119120
int addr_count;
120121
int curr_addr_index;
121122
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
@@ -303,7 +304,8 @@ static int addr_compare(const struct sockaddr_storage *x,
303304
}
304305

305306
static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
306-
struct sockaddr *sa_out, bool try_new_addr)
307+
struct sockaddr *sa_out, bool try_new_addr,
308+
unsigned int *mark)
307309
{
308310
struct sockaddr_storage sas;
309311
struct dlm_node_addr *na;
@@ -331,6 +333,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
331333
if (!na->addr_count)
332334
return -ENOENT;
333335

336+
*mark = na->mark;
337+
334338
if (sas_out)
335339
memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
336340

@@ -350,7 +354,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
350354
return 0;
351355
}
352356

353-
static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
357+
static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid,
358+
unsigned int *mark)
354359
{
355360
struct dlm_node_addr *na;
356361
int rv = -EEXIST;
@@ -364,6 +369,7 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
364369
for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
365370
if (addr_compare(na->addr[addr_i], addr)) {
366371
*nodeid = na->nodeid;
372+
*mark = na->mark;
367373
rv = 0;
368374
goto unlock;
369375
}
@@ -412,6 +418,7 @@ int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
412418
new_node->nodeid = nodeid;
413419
new_node->addr[0] = new_addr;
414420
new_node->addr_count = 1;
421+
new_node->mark = dlm_config.ci_mark;
415422
list_add(&new_node->list, &dlm_node_addrs);
416423
spin_unlock(&dlm_node_addrs_spin);
417424
return 0;
@@ -519,6 +526,23 @@ int dlm_lowcomms_connect_node(int nodeid)
519526
return 0;
520527
}
521528

529+
int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
530+
{
531+
struct dlm_node_addr *na;
532+
533+
spin_lock(&dlm_node_addrs_spin);
534+
na = find_node_addr(nodeid);
535+
if (!na) {
536+
spin_unlock(&dlm_node_addrs_spin);
537+
return -ENOENT;
538+
}
539+
540+
na->mark = mark;
541+
spin_unlock(&dlm_node_addrs_spin);
542+
543+
return 0;
544+
}
545+
522546
static void lowcomms_error_report(struct sock *sk)
523547
{
524548
struct connection *con;
@@ -867,7 +891,7 @@ static int accept_from_sock(struct listen_connection *con)
867891

868892
/* Get the new node's NODEID */
869893
make_sockaddr(&peeraddr, 0, &len);
870-
if (addr_to_nodeid(&peeraddr, &nodeid)) {
894+
if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
871895
unsigned char *b=(unsigned char *)&peeraddr;
872896
log_print("connect from non cluster node");
873897
print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
@@ -876,9 +900,6 @@ static int accept_from_sock(struct listen_connection *con)
876900
return -1;
877901
}
878902

879-
dlm_comm_mark(nodeid, &mark);
880-
sock_set_mark(newsock->sk, mark);
881-
882903
log_print("got connection from %d", nodeid);
883904

884905
/* Check to see if we already have a connection to this node. This
@@ -892,6 +913,8 @@ static int accept_from_sock(struct listen_connection *con)
892913
goto accept_err;
893914
}
894915

916+
sock_set_mark(newsock->sk, mark);
917+
895918
mutex_lock(&newcon->sock_mutex);
896919
if (newcon->sock) {
897920
struct connection *othercon = newcon->othercon;
@@ -1015,8 +1038,6 @@ static void sctp_connect_to_sock(struct connection *con)
10151038
struct socket *sock;
10161039
unsigned int mark;
10171040

1018-
dlm_comm_mark(con->nodeid, &mark);
1019-
10201041
mutex_lock(&con->sock_mutex);
10211042

10221043
/* Some odd races can cause double-connects, ignore them */
@@ -1029,7 +1050,7 @@ static void sctp_connect_to_sock(struct connection *con)
10291050
}
10301051

10311052
memset(&daddr, 0, sizeof(daddr));
1032-
result = nodeid_to_addr(con->nodeid, &daddr, NULL, true);
1053+
result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark);
10331054
if (result < 0) {
10341055
log_print("no address for nodeid %d", con->nodeid);
10351056
goto out;
@@ -1104,13 +1125,11 @@ static void sctp_connect_to_sock(struct connection *con)
11041125
static void tcp_connect_to_sock(struct connection *con)
11051126
{
11061127
struct sockaddr_storage saddr, src_addr;
1128+
unsigned int mark;
11071129
int addr_len;
11081130
struct socket *sock = NULL;
1109-
unsigned int mark;
11101131
int result;
11111132

1112-
dlm_comm_mark(con->nodeid, &mark);
1113-
11141133
mutex_lock(&con->sock_mutex);
11151134
if (con->retries++ > MAX_CONNECT_RETRIES)
11161135
goto out;
@@ -1125,15 +1144,15 @@ static void tcp_connect_to_sock(struct connection *con)
11251144
if (result < 0)
11261145
goto out_err;
11271146

1128-
sock_set_mark(sock->sk, mark);
1129-
11301147
memset(&saddr, 0, sizeof(saddr));
1131-
result = nodeid_to_addr(con->nodeid, &saddr, NULL, false);
1148+
result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark);
11321149
if (result < 0) {
11331150
log_print("no address for nodeid %d", con->nodeid);
11341151
goto out_err;
11351152
}
11361153

1154+
sock_set_mark(sock->sk, mark);
1155+
11371156
add_sock(sock, con);
11381157

11391158
/* Bind to our cluster-known address connecting to avoid

fs/dlm/lowcomms.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ int dlm_lowcomms_close(int nodeid);
2121
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
2222
void dlm_lowcomms_commit_buffer(void *mh);
2323
int dlm_lowcomms_connect_node(int nodeid);
24+
int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
2425
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
2526

2627
#endif /* __LOWCOMMS_DOT_H__ */

0 commit comments

Comments
 (0)