Skip to content

Commit 7e3f295

Browse files
chrismason-xxAndy Grover
authored andcommitted
rds: don't let RDS shutdown a connection while senders are present
This is the first in a long line of patches that tries to fix races between RDS connection shutdown and RDS traffic. Here we are maintaining a count of active senders to make sure the connection doesn't go away while they are using it. Signed-off-by: Chris Mason <[email protected]>
1 parent 38a4e5e commit 7e3f295

File tree

5 files changed

+26
-16
lines changed

5 files changed

+26
-16
lines changed

net/rds/connection.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
148148

149149
spin_lock_init(&conn->c_send_lock);
150150
atomic_set(&conn->c_send_generation, 1);
151+
atomic_set(&conn->c_senders, 0);
151152
INIT_LIST_HEAD(&conn->c_send_queue);
152153
INIT_LIST_HEAD(&conn->c_retrans);
153154

@@ -276,6 +277,12 @@ void rds_conn_shutdown(struct rds_connection *conn)
276277
spin_lock_irq(&conn->c_send_lock);
277278
spin_unlock_irq(&conn->c_send_lock);
278279

280+
while(atomic_read(&conn->c_senders)) {
281+
schedule_timeout(1);
282+
spin_lock_irq(&conn->c_send_lock);
283+
spin_unlock_irq(&conn->c_send_lock);
284+
}
285+
279286
conn->c_trans->conn_shutdown(conn);
280287
rds_conn_reset(conn);
281288

net/rds/ib_recv.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -863,18 +863,6 @@ int rds_ib_recv(struct rds_connection *conn)
863863
int ret = 0;
864864

865865
rdsdebug("conn %p\n", conn);
866-
867-
/*
868-
* If we get a temporary posting failure in this context then
869-
* we're really low and we want the caller to back off for a bit.
870-
*/
871-
mutex_lock(&ic->i_recv_mutex);
872-
if (rds_ib_recv_refill(conn, 0))
873-
ret = -ENOMEM;
874-
else
875-
rds_ib_stats_inc(s_ib_rx_refill_from_thread);
876-
mutex_unlock(&ic->i_recv_mutex);
877-
878866
if (rds_conn_up(conn))
879867
rds_ib_attempt_ack(ic);
880868

net/rds/message.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ static void rds_message_purge(struct rds_message *rm)
8181
void rds_message_put(struct rds_message *rm)
8282
{
8383
rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
84-
84+
if (atomic_read(&rm->m_refcount) == 0) {
85+
printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86+
WARN_ON(1);
87+
}
8588
if (atomic_dec_and_test(&rm->m_refcount)) {
8689
BUG_ON(!list_empty(&rm->m_sock_item));
8790
BUG_ON(!list_empty(&rm->m_conn_item));

net/rds/rds.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ struct rds_connection {
9393

9494
spinlock_t c_send_lock; /* protect send ring */
9595
atomic_t c_send_generation;
96+
atomic_t c_senders;
9697
struct rds_message *c_xmit_rm;
9798
unsigned long c_xmit_sg;
9899
unsigned int c_xmit_hdr_off;

net/rds/send.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,23 @@ void rds_send_reset(struct rds_connection *conn)
6060
struct rds_message *rm, *tmp;
6161
unsigned long flags;
6262

63+
spin_lock_irqsave(&conn->c_send_lock, flags);
6364
if (conn->c_xmit_rm) {
65+
rm = conn->c_xmit_rm;
66+
conn->c_xmit_rm = NULL;
6467
/* Tell the user the RDMA op is no longer mapped by the
6568
* transport. This isn't entirely true (it's flushed out
6669
* independently) but as the connection is down, there's
6770
* no ongoing RDMA to/from that memory */
68-
rds_message_unmapped(conn->c_xmit_rm);
69-
rds_message_put(conn->c_xmit_rm);
70-
conn->c_xmit_rm = NULL;
71+
printk(KERN_CRIT "send reset unmapping %p\n", rm);
72+
rds_message_unmapped(rm);
73+
spin_unlock_irqrestore(&conn->c_send_lock, flags);
74+
75+
rds_message_put(rm);
76+
} else {
77+
spin_unlock_irqrestore(&conn->c_send_lock, flags);
7178
}
79+
7280
conn->c_xmit_sg = 0;
7381
conn->c_xmit_hdr_off = 0;
7482
conn->c_xmit_data_off = 0;
@@ -131,6 +139,7 @@ int rds_send_xmit(struct rds_connection *conn)
131139
ret = -ENOMEM;
132140
goto out;
133141
}
142+
atomic_inc(&conn->c_senders);
134143

135144
if (conn->c_trans->xmit_prepare)
136145
conn->c_trans->xmit_prepare(conn);
@@ -350,6 +359,8 @@ int rds_send_xmit(struct rds_connection *conn)
350359
rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
351360
}
352361

362+
atomic_dec(&conn->c_senders);
363+
353364
/*
354365
* Other senders will see we have c_send_lock and exit. We
355366
* need to recheck the send queue and race again for c_send_lock

0 commit comments

Comments
 (0)