Skip to content

Commit e548e9b

Browse files
ukernelidryomov
authored andcommitted
ceph: re-send flushing caps (which are revoked) in reconnect stage
if flushing caps were revoked, we should re-send the cap flush in client reconnect stage. This guarantees that MDS processes the cap flush message before issuing the flushing caps to other client. Signed-off-by: Yan, Zheng <[email protected]>
1 parent a2971c8 commit e548e9b

File tree

3 files changed

+61
-6
lines changed

3 files changed

+61
-6
lines changed

fs/ceph/caps.c

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,7 @@ static int __mark_caps_flushing(struct inode *inode,
14861486

14871487
cf = kmalloc(sizeof(*cf), GFP_ATOMIC);
14881488
cf->caps = flushing;
1489+
cf->kick = false;
14891490

14901491
spin_lock(&mdsc->cap_dirty_lock);
14911492
list_del_init(&ci->i_dirty_item);
@@ -2101,7 +2102,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
21012102

21022103
static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
21032104
struct ceph_mds_session *session,
2104-
struct ceph_inode_info *ci)
2105+
struct ceph_inode_info *ci,
2106+
bool kick_all)
21052107
{
21062108
struct inode *inode = &ci->vfs_inode;
21072109
struct ceph_cap *cap;
@@ -2127,7 +2129,9 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
21272129

21282130
for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
21292131
cf = rb_entry(n, struct ceph_cap_flush, i_node);
2130-
if (cf->tid >= first_tid)
2132+
if (cf->tid < first_tid)
2133+
continue;
2134+
if (kick_all || cf->kick)
21312135
break;
21322136
}
21332137
if (!n) {
@@ -2136,6 +2140,8 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
21362140
}
21372141

21382142
cf = rb_entry(n, struct ceph_cap_flush, i_node);
2143+
cf->kick = false;
2144+
21392145
first_tid = cf->tid + 1;
21402146

21412147
dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode,
@@ -2149,6 +2155,49 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
21492155
return delayed;
21502156
}
21512157

2158+
void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
2159+
struct ceph_mds_session *session)
2160+
{
2161+
struct ceph_inode_info *ci;
2162+
struct ceph_cap *cap;
2163+
struct ceph_cap_flush *cf;
2164+
struct rb_node *n;
2165+
2166+
dout("early_kick_flushing_caps mds%d\n", session->s_mds);
2167+
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
2168+
spin_lock(&ci->i_ceph_lock);
2169+
cap = ci->i_auth_cap;
2170+
if (!(cap && cap->session == session)) {
2171+
pr_err("%p auth cap %p not mds%d ???\n",
2172+
&ci->vfs_inode, cap, session->s_mds);
2173+
spin_unlock(&ci->i_ceph_lock);
2174+
continue;
2175+
}
2176+
2177+
2178+
/*
2179+
* if flushing caps were revoked, we re-send the cap flush
2180+
* in client reconnect stage. This guarantees MDS * processes
2181+
* the cap flush message before issuing the flushing caps to
2182+
* other client.
2183+
*/
2184+
if ((cap->issued & ci->i_flushing_caps) !=
2185+
ci->i_flushing_caps) {
2186+
spin_unlock(&ci->i_ceph_lock);
2187+
if (!__kick_flushing_caps(mdsc, session, ci, true))
2188+
continue;
2189+
spin_lock(&ci->i_ceph_lock);
2190+
}
2191+
2192+
for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
2193+
cf = rb_entry(n, struct ceph_cap_flush, i_node);
2194+
cf->kick = true;
2195+
}
2196+
2197+
spin_unlock(&ci->i_ceph_lock);
2198+
}
2199+
}
2200+
21522201
void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
21532202
struct ceph_mds_session *session)
21542203
{
@@ -2158,7 +2207,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
21582207

21592208
dout("kick_flushing_caps mds%d\n", session->s_mds);
21602209
list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
2161-
int delayed = __kick_flushing_caps(mdsc, session, ci);
2210+
int delayed = __kick_flushing_caps(mdsc, session, ci, false);
21622211
if (delayed) {
21632212
spin_lock(&ci->i_ceph_lock);
21642213
__cap_delay_requeue(mdsc, ci);
@@ -2191,7 +2240,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
21912240

21922241
spin_unlock(&ci->i_ceph_lock);
21932242

2194-
delayed = __kick_flushing_caps(mdsc, session, ci);
2243+
delayed = __kick_flushing_caps(mdsc, session, ci, true);
21952244
if (delayed) {
21962245
spin_lock(&ci->i_ceph_lock);
21972246
__cap_delay_requeue(mdsc, ci);

fs/ceph/mds_client.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2982,6 +2982,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
29822982

29832983
reply->hdr.data_len = cpu_to_le32(pagelist->length);
29842984
ceph_msg_data_add_pagelist(reply, pagelist);
2985+
2986+
ceph_early_kick_flushing_caps(mdsc, session);
2987+
29852988
ceph_con_send(&session->s_con, reply);
29862989

29872990
mutex_unlock(&session->s_mutex);

fs/ceph/super.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,10 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
189189
struct ceph_cap_flush {
190190
u64 tid;
191191
int caps;
192-
struct rb_node g_node;
192+
bool kick;
193+
struct rb_node g_node; // global
193194
union {
194-
struct rb_node i_node;
195+
struct rb_node i_node; // inode
195196
struct list_head list;
196197
};
197198
};
@@ -868,6 +869,8 @@ extern void ceph_queue_caps_release(struct inode *inode);
868869
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
869870
extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
870871
int datasync);
872+
extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
873+
struct ceph_mds_session *session);
871874
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
872875
struct ceph_mds_session *session);
873876
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,

0 commit comments

Comments
 (0)