diff mbox series

ceph: defer stopping the mdsc delayed_work

Message ID 20230629033533.270535-1-xiubli@redhat.com
State New
Headers show
Series ceph: defer stopping the mdsc delayed_work | expand

Commit Message

Xiubo Li June 29, 2023, 3:35 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Flushing the dirty buffer may take a long time if the Rados is
overloaded or if there is network issue. So we should ping the
MDSs perioudically to keep alive, else the MDS will blocklist
the kclient.

Cc: stable@vger.kernel.org
Cc: Venky Shankar <vshankar@redhat.com>
URL: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c | 2 +-
 fs/ceph/mds_client.h | 3 ++-
 fs/ceph/super.c      | 7 ++++---
 3 files changed, 7 insertions(+), 5 deletions(-)

Comments

Milind Changire July 21, 2023, 3:19 a.m. UTC | #1
Looks good to me.

nit: typo for perioudically in commit message

Reviewed-by: Milind Changire <mchangir@redhat.com>


On Thu, Jun 29, 2023 at 9:07 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> Flushing the dirty buffer may take a long time if the Rados is
> overloaded or if there is network issue. So we should ping the
> MDSs perioudically to keep alive, else the MDS will blocklist
> the kclient.
>
> Cc: stable@vger.kernel.org
> Cc: Venky Shankar <vshankar@redhat.com>
> URL: https://tracker.ceph.com/issues/61843
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c | 2 +-
>  fs/ceph/mds_client.h | 3 ++-
>  fs/ceph/super.c      | 7 ++++---
>  3 files changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 65230ebefd51..70987b3c198a 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -5192,7 +5192,7 @@ static void delayed_work(struct work_struct *work)
>
>         doutc(mdsc->fsc->client, "mdsc delayed_work\n");
>
> -       if (mdsc->stopping)
> +       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
>                 return;
>
>         mutex_lock(&mdsc->mutex);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 5d02c8c582fd..befbd384428e 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -400,7 +400,8 @@ struct cap_wait {
>
>  enum {
>         CEPH_MDSC_STOPPING_BEGIN = 1,
> -       CEPH_MDSC_STOPPING_FLUSHED = 2,
> +       CEPH_MDSC_STOPPING_FLUSHING = 2,
> +       CEPH_MDSC_STOPPING_FLUSHED = 3,
>  };
>
>  /*
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 8e1e517a45db..fb694ba72955 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -1488,7 +1488,7 @@ static int ceph_init_fs_context(struct fs_context *fc)
>  static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
>  {
>         spin_lock(&mdsc->stopping_lock);
> -       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) {
> +       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
>                 spin_unlock(&mdsc->stopping_lock);
>                 return false;
>         }
> @@ -1501,7 +1501,7 @@ static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
>  {
>         spin_lock(&mdsc->stopping_lock);
>         if (!atomic_dec_return(&mdsc->stopping_blockers) &&
> -           mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
> +           mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
>                 complete_all(&mdsc->stopping_waiter);
>         spin_unlock(&mdsc->stopping_lock);
>  }
> @@ -1562,7 +1562,7 @@ static void ceph_kill_sb(struct super_block *s)
>         sync_filesystem(s);
>
>         spin_lock(&mdsc->stopping_lock);
> -       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
> +       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
>         wait = !!atomic_read(&mdsc->stopping_blockers);
>         spin_unlock(&mdsc->stopping_lock);
>
> @@ -1576,6 +1576,7 @@ static void ceph_kill_sb(struct super_block *s)
>                         pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
>         }
>
> +       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
>         kill_anon_super(s);
>
>         fsc->client->extra_mon_dispatch = NULL;
> --
> 2.40.1
>
Xiubo Li July 21, 2023, 4:21 a.m. UTC | #2
On 7/21/23 11:19, Milind Changire wrote:
> Looks good to me.
>
> nit: typo for perioudically in commit message

Good catch and thanks Milind, I will fix it.

-- Xiubo


> Reviewed-by: Milind Changire <mchangir@redhat.com>
>
>
> On Thu, Jun 29, 2023 at 9:07 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Flushing the dirty buffer may take a long time if the Rados is
>> overloaded or if there is network issue. So we should ping the
>> MDSs perioudically to keep alive, else the MDS will blocklist
>> the kclient.
>>
>> Cc: stable@vger.kernel.org
>> Cc: Venky Shankar <vshankar@redhat.com>
>> URL: https://tracker.ceph.com/issues/61843
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c | 2 +-
>>   fs/ceph/mds_client.h | 3 ++-
>>   fs/ceph/super.c      | 7 ++++---
>>   3 files changed, 7 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 65230ebefd51..70987b3c198a 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -5192,7 +5192,7 @@ static void delayed_work(struct work_struct *work)
>>
>>          doutc(mdsc->fsc->client, "mdsc delayed_work\n");
>>
>> -       if (mdsc->stopping)
>> +       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
>>                  return;
>>
>>          mutex_lock(&mdsc->mutex);
>> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
>> index 5d02c8c582fd..befbd384428e 100644
>> --- a/fs/ceph/mds_client.h
>> +++ b/fs/ceph/mds_client.h
>> @@ -400,7 +400,8 @@ struct cap_wait {
>>
>>   enum {
>>          CEPH_MDSC_STOPPING_BEGIN = 1,
>> -       CEPH_MDSC_STOPPING_FLUSHED = 2,
>> +       CEPH_MDSC_STOPPING_FLUSHING = 2,
>> +       CEPH_MDSC_STOPPING_FLUSHED = 3,
>>   };
>>
>>   /*
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 8e1e517a45db..fb694ba72955 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -1488,7 +1488,7 @@ static int ceph_init_fs_context(struct fs_context *fc)
>>   static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
>>   {
>>          spin_lock(&mdsc->stopping_lock);
>> -       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) {
>> +       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
>>                  spin_unlock(&mdsc->stopping_lock);
>>                  return false;
>>          }
>> @@ -1501,7 +1501,7 @@ static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
>>   {
>>          spin_lock(&mdsc->stopping_lock);
>>          if (!atomic_dec_return(&mdsc->stopping_blockers) &&
>> -           mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
>> +           mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
>>                  complete_all(&mdsc->stopping_waiter);
>>          spin_unlock(&mdsc->stopping_lock);
>>   }
>> @@ -1562,7 +1562,7 @@ static void ceph_kill_sb(struct super_block *s)
>>          sync_filesystem(s);
>>
>>          spin_lock(&mdsc->stopping_lock);
>> -       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
>> +       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
>>          wait = !!atomic_read(&mdsc->stopping_blockers);
>>          spin_unlock(&mdsc->stopping_lock);
>>
>> @@ -1576,6 +1576,7 @@ static void ceph_kill_sb(struct super_block *s)
>>                          pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
>>          }
>>
>> +       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
>>          kill_anon_super(s);
>>
>>          fsc->client->extra_mon_dispatch = NULL;
>> --
>> 2.40.1
>>
>
diff mbox series

Patch

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 65230ebefd51..70987b3c198a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5192,7 +5192,7 @@  static void delayed_work(struct work_struct *work)
 
 	doutc(mdsc->fsc->client, "mdsc delayed_work\n");
 
-	if (mdsc->stopping)
+	if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
 		return;
 
 	mutex_lock(&mdsc->mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5d02c8c582fd..befbd384428e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -400,7 +400,8 @@  struct cap_wait {
 
 enum {
 	CEPH_MDSC_STOPPING_BEGIN = 1,
-	CEPH_MDSC_STOPPING_FLUSHED = 2,
+	CEPH_MDSC_STOPPING_FLUSHING = 2,
+	CEPH_MDSC_STOPPING_FLUSHED = 3,
 };
 
 /*
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8e1e517a45db..fb694ba72955 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1488,7 +1488,7 @@  static int ceph_init_fs_context(struct fs_context *fc)
 static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
 {
 	spin_lock(&mdsc->stopping_lock);
-	if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) {
+	if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
 		spin_unlock(&mdsc->stopping_lock);
 		return false;
 	}
@@ -1501,7 +1501,7 @@  static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
 {
 	spin_lock(&mdsc->stopping_lock);
 	if (!atomic_dec_return(&mdsc->stopping_blockers) &&
-	    mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
+	    mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
 		complete_all(&mdsc->stopping_waiter);
 	spin_unlock(&mdsc->stopping_lock);
 }
@@ -1562,7 +1562,7 @@  static void ceph_kill_sb(struct super_block *s)
 	sync_filesystem(s);
 
 	spin_lock(&mdsc->stopping_lock);
-	mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+	mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
 	wait = !!atomic_read(&mdsc->stopping_blockers);
 	spin_unlock(&mdsc->stopping_lock);
 
@@ -1576,6 +1576,7 @@  static void ceph_kill_sb(struct super_block *s)
 			pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
 	}
 
+	mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
 	kill_anon_super(s);
 
 	fsc->client->extra_mon_dispatch = NULL;