diff mbox series

[01/15] ceph: Convert ceph_writepages_start() to use folios a little more

Message ID 20230825201225.348148-2-willy@infradead.org
State New
Headers show
Series Many folio conversions for ceph | expand

Commit Message

Matthew Wilcox Aug. 25, 2023, 8:12 p.m. UTC
After we iterate through the locked folios using filemap_get_folios_tag(),
we currently convert back to a page (and then in some circumstaces back
to a folio again!).  Just use a folio throughout and avoid various hidden
calls to compound_head().  Ceph still uses a page array to interact with
the OSD which should be cleaned up in a subsequent patch.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ceph/addr.c | 100 ++++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 51 deletions(-)

Comments

Xiubo Li Aug. 28, 2023, 1:18 a.m. UTC | #1
On 8/26/23 04:12, Matthew Wilcox (Oracle) wrote:
> After we iterate through the locked folios using filemap_get_folios_tag(),
> we currently convert back to a page (and then in some circumstaces back
> to a folio again!).  Just use a folio throughout and avoid various hidden
> calls to compound_head().  Ceph still uses a page array to interact with
> the OSD which should be cleaned up in a subsequent patch.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   fs/ceph/addr.c | 100 ++++++++++++++++++++++++-------------------------
>   1 file changed, 49 insertions(+), 51 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index f4863078f7fe..9a0a79833eb0 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1018,7 +1018,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>   		int num_ops = 0, op_idx;
>   		unsigned i, nr_folios, max_pages, locked_pages = 0;
>   		struct page **pages = NULL, **data_pages;
> -		struct page *page;
> +		struct folio *folio;
>   		pgoff_t strip_unit_end = 0;
>   		u64 offset = 0, len = 0;
>   		bool from_pool = false;
> @@ -1032,22 +1032,22 @@ static int ceph_writepages_start(struct address_space *mapping,
>   		if (!nr_folios && !locked_pages)
>   			break;
>   		for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
> -			page = &fbatch.folios[i]->page;
> -			dout("? %p idx %lu\n", page, page->index);
> +			folio = fbatch.folios[i];
> +			dout("? %p idx %lu\n", folio, folio->index);
>   			if (locked_pages == 0)
> -				lock_page(page);  /* first page */
> -			else if (!trylock_page(page))
> +				folio_lock(folio);  /* first folio */
> +			else if (!folio_trylock(folio))
>   				break;
>   
>   			/* only dirty pages, or our accounting breaks */
> -			if (unlikely(!PageDirty(page)) ||
> -			    unlikely(page->mapping != mapping)) {
> -				dout("!dirty or !mapping %p\n", page);
> -				unlock_page(page);
> +			if (unlikely(!folio_test_dirty(folio)) ||
> +			    unlikely(folio->mapping != mapping)) {
> +				dout("!dirty or !mapping %p\n", folio);
> +				folio_unlock(folio);
>   				continue;
>   			}
>   			/* only if matching snap context */
> -			pgsnapc = page_snap_context(page);
> +			pgsnapc = folio->private;
>   			if (pgsnapc != snapc) {
>   				dout("page snapc %p %lld != oldest %p %lld\n",
>   				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
> @@ -1055,12 +1055,10 @@ static int ceph_writepages_start(struct address_space *mapping,
>   				    !ceph_wbc.head_snapc &&
>   				    wbc->sync_mode != WB_SYNC_NONE)
>   					should_loop = true;
> -				unlock_page(page);
> +				folio_unlock(folio);
>   				continue;
>   			}
> -			if (page_offset(page) >= ceph_wbc.i_size) {
> -				struct folio *folio = page_folio(page);
> -
> +			if (folio_pos(folio) >= ceph_wbc.i_size) {
>   				dout("folio at %lu beyond eof %llu\n",
>   				     folio->index, ceph_wbc.i_size);
>   				if ((ceph_wbc.size_stable ||
> @@ -1071,31 +1069,32 @@ static int ceph_writepages_start(struct address_space *mapping,
>   				folio_unlock(folio);
>   				continue;
>   			}
> -			if (strip_unit_end && (page->index > strip_unit_end)) {
> -				dout("end of strip unit %p\n", page);
> -				unlock_page(page);
> +			if (strip_unit_end && (folio->index > strip_unit_end)) {
> +				dout("end of strip unit %p\n", folio);
> +				folio_unlock(folio);
>   				break;
>   			}
> -			if (PageWriteback(page) || PageFsCache(page)) {
> +			if (folio_test_writeback(folio) ||
> +			    folio_test_fscache(folio)) {
>   				if (wbc->sync_mode == WB_SYNC_NONE) {
> -					dout("%p under writeback\n", page);
> -					unlock_page(page);
> +					dout("%p under writeback\n", folio);
> +					folio_unlock(folio);
>   					continue;
>   				}
> -				dout("waiting on writeback %p\n", page);
> -				wait_on_page_writeback(page);
> -				wait_on_page_fscache(page);
> +				dout("waiting on writeback %p\n", folio);
> +				folio_wait_writeback(folio);
> +				folio_wait_fscache(folio);
>   			}
>   
> -			if (!clear_page_dirty_for_io(page)) {
> -				dout("%p !clear_page_dirty_for_io\n", page);
> -				unlock_page(page);
> +			if (!folio_clear_dirty_for_io(folio)) {
> +				dout("%p !folio_clear_dirty_for_io\n", folio);
> +				folio_unlock(folio);
>   				continue;
>   			}
>   
>   			/*
>   			 * We have something to write.  If this is
> -			 * the first locked page this time through,
> +			 * the first locked folio this time through,
>   			 * calculate max possinle write size and
>   			 * allocate a page array
>   			 */
> @@ -1105,7 +1104,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>   				u32 xlen;
>   
>   				/* prepare async write request */
> -				offset = (u64)page_offset(page);
> +				offset = folio_pos(folio);
>   				ceph_calc_file_object_mapping(&ci->i_layout,
>   							      offset, wsize,
>   							      &objnum, &objoff,
> @@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>   				len = xlen;
>   
>   				num_ops = 1;
> -				strip_unit_end = page->index +
> +				strip_unit_end = folio->index +
>   					((len - 1) >> PAGE_SHIFT);
>   
>   				BUG_ON(pages);
> @@ -1128,23 +1127,23 @@ static int ceph_writepages_start(struct address_space *mapping,
>   				}
>   
>   				len = 0;
> -			} else if (page->index !=
> +			} else if (folio->index !=
>   				   (offset + len) >> PAGE_SHIFT) {
>   				if (num_ops >= (from_pool ?  CEPH_OSD_SLAB_OPS :
>   							     CEPH_OSD_MAX_OPS)) {
> -					redirty_page_for_writepage(wbc, page);
> -					unlock_page(page);
> +					folio_redirty_for_writepage(wbc, folio);
> +					folio_unlock(folio);
>   					break;
>   				}
>   
>   				num_ops++;
> -				offset = (u64)page_offset(page);
> +				offset = (u64)folio_pos(folio);
>   				len = 0;
>   			}
>   
>   			/* note position of first page in fbatch */
> -			dout("%p will write page %p idx %lu\n",
> -			     inode, page, page->index);
> +			dout("%p will write folio %p idx %lu\n",
> +			     inode, folio, folio->index);
>   
>   			if (atomic_long_inc_return(&fsc->writeback_count) >
>   			    CONGESTION_ON_THRESH(
> @@ -1153,7 +1152,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>   
>   			if (IS_ENCRYPTED(inode)) {
>   				pages[locked_pages] =
> -					fscrypt_encrypt_pagecache_blocks(page,
> +					fscrypt_encrypt_pagecache_blocks(&folio->page,
>   						PAGE_SIZE, 0,
>   						locked_pages ? GFP_NOWAIT : GFP_NOFS);
>   				if (IS_ERR(pages[locked_pages])) {
> @@ -1163,17 +1162,17 @@ static int ceph_writepages_start(struct address_space *mapping,
>   					/* better not fail on first page! */
>   					BUG_ON(locked_pages == 0);
>   					pages[locked_pages] = NULL;
> -					redirty_page_for_writepage(wbc, page);
> -					unlock_page(page);
> +					folio_redirty_for_writepage(wbc, folio);
> +					folio_unlock(folio);
>   					break;
>   				}
>   				++locked_pages;
>   			} else {
> -				pages[locked_pages++] = page;
> +				pages[locked_pages++] = &folio->page;
>   			}
>   
>   			fbatch.folios[i] = NULL;
> -			len += thp_size(page);
> +			len += folio_size(folio);
>   		}
>   
>   		/* did we get anything? */
> @@ -1222,7 +1221,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>   			BUG_ON(IS_ERR(req));
>   		}
>   		BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
> -			     thp_size(pages[locked_pages - 1]) - offset);
> +			     folio_size(folio) - offset);
>   
>   		if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
>   			rc = -EIO;
> @@ -1236,9 +1235,9 @@ static int ceph_writepages_start(struct address_space *mapping,
>   		data_pages = pages;
>   		op_idx = 0;
>   		for (i = 0; i < locked_pages; i++) {
> -			struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
> +			struct folio *folio = page_folio(ceph_fscrypt_pagecache_page(pages[i]));
>   
> -			u64 cur_offset = page_offset(page);
> +			u64 cur_offset = folio_pos(folio);
>   			/*
>   			 * Discontinuity in page range? Ceph can handle that by just passing
>   			 * multiple extents in the write op.
> @@ -1267,10 +1266,10 @@ static int ceph_writepages_start(struct address_space *mapping,
>   				op_idx++;
>   			}
>   
> -			set_page_writeback(page);
> +			folio_start_writeback(folio);
>   			if (caching)
> -				ceph_set_page_fscache(page);
> -			len += thp_size(page);
> +				ceph_set_page_fscache(pages[i]);
> +			len += folio_size(folio);
>   		}
>   		ceph_fscache_write_to_cache(inode, offset, len, caching);
>   
> @@ -1280,7 +1279,7 @@ static int ceph_writepages_start(struct address_space *mapping,
>   			/* writepages_finish() clears writeback pages
>   			 * according to the data length, so make sure
>   			 * data length covers all locked pages */
> -			u64 min_len = len + 1 - thp_size(page);
> +			u64 min_len = len + 1 - folio_size(folio);
>   			len = get_writepages_data_length(inode, pages[i - 1],
>   							 offset);
>   			len = max(len, min_len);
> @@ -1360,7 +1359,6 @@ static int ceph_writepages_start(struct address_space *mapping,
>   		if (wbc->sync_mode != WB_SYNC_NONE &&
>   		    start_index == 0 && /* all dirty pages were checked */
>   		    !ceph_wbc.head_snapc) {
> -			struct page *page;
>   			unsigned i, nr;
>   			index = 0;
>   			while ((index <= end) &&
> @@ -1369,10 +1367,10 @@ static int ceph_writepages_start(struct address_space *mapping,
>   						PAGECACHE_TAG_WRITEBACK,
>   						&fbatch))) {
>   				for (i = 0; i < nr; i++) {
> -					page = &fbatch.folios[i]->page;
> -					if (page_snap_context(page) != snapc)
> +					struct folio *folio = fbatch.folios[i];
> +					if (folio->private != snapc)

Here IMO we should reuse and rename 'page_snap_context()' --> 
'folio_snap_context()' instead of 'folio->private' directly. As I 
remembered if the dirty bit is not set the `page->private` still could 
be non-NULL in some cases ?

Thanks

- Xiubo


>   						continue;
> -					wait_on_page_writeback(page);
> +					folio_wait_writeback(folio);
>   				}
>   				folio_batch_release(&fbatch);
>   				cond_resched();
Xiubo Li Nov. 20, 2023, 12:30 a.m. UTC | #2
On 8/28/23 09:18, Xiubo Li wrote:
>
> On 8/26/23 04:12, Matthew Wilcox (Oracle) wrote:
>> After we iterate through the locked folios using 
>> filemap_get_folios_tag(),
>> we currently convert back to a page (and then in some circumstaces back
>> to a folio again!).  Just use a folio throughout and avoid various 
>> hidden
>> calls to compound_head().  Ceph still uses a page array to interact with
>> the OSD which should be cleaned up in a subsequent patch.
>>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>   fs/ceph/addr.c | 100 ++++++++++++++++++++++++-------------------------
>>   1 file changed, 49 insertions(+), 51 deletions(-)
>>
>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>> index f4863078f7fe..9a0a79833eb0 100644
>> --- a/fs/ceph/addr.c
>> +++ b/fs/ceph/addr.c
>> @@ -1018,7 +1018,7 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>           int num_ops = 0, op_idx;
>>           unsigned i, nr_folios, max_pages, locked_pages = 0;
>>           struct page **pages = NULL, **data_pages;
>> -        struct page *page;
>> +        struct folio *folio;
>>           pgoff_t strip_unit_end = 0;
>>           u64 offset = 0, len = 0;
>>           bool from_pool = false;
>> @@ -1032,22 +1032,22 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>           if (!nr_folios && !locked_pages)
>>               break;
>>           for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
>> -            page = &fbatch.folios[i]->page;
>> -            dout("? %p idx %lu\n", page, page->index);
>> +            folio = fbatch.folios[i];
>> +            dout("? %p idx %lu\n", folio, folio->index);
>>               if (locked_pages == 0)
>> -                lock_page(page);  /* first page */
>> -            else if (!trylock_page(page))
>> +                folio_lock(folio);  /* first folio */
>> +            else if (!folio_trylock(folio))
>>                   break;
>>                 /* only dirty pages, or our accounting breaks */
>> -            if (unlikely(!PageDirty(page)) ||
>> -                unlikely(page->mapping != mapping)) {
>> -                dout("!dirty or !mapping %p\n", page);
>> -                unlock_page(page);
>> +            if (unlikely(!folio_test_dirty(folio)) ||
>> +                unlikely(folio->mapping != mapping)) {
>> +                dout("!dirty or !mapping %p\n", folio);
>> +                folio_unlock(folio);
>>                   continue;
>>               }
>>               /* only if matching snap context */
>> -            pgsnapc = page_snap_context(page);
>> +            pgsnapc = folio->private;
>>               if (pgsnapc != snapc) {
>>                   dout("page snapc %p %lld != oldest %p %lld\n",
>>                        pgsnapc, pgsnapc->seq, snapc, snapc->seq);
>> @@ -1055,12 +1055,10 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                       !ceph_wbc.head_snapc &&
>>                       wbc->sync_mode != WB_SYNC_NONE)
>>                       should_loop = true;
>> -                unlock_page(page);
>> +                folio_unlock(folio);
>>                   continue;
>>               }
>> -            if (page_offset(page) >= ceph_wbc.i_size) {
>> -                struct folio *folio = page_folio(page);
>> -
>> +            if (folio_pos(folio) >= ceph_wbc.i_size) {
>>                   dout("folio at %lu beyond eof %llu\n",
>>                        folio->index, ceph_wbc.i_size);
>>                   if ((ceph_wbc.size_stable ||
>> @@ -1071,31 +1069,32 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                   folio_unlock(folio);
>>                   continue;
>>               }
>> -            if (strip_unit_end && (page->index > strip_unit_end)) {
>> -                dout("end of strip unit %p\n", page);
>> -                unlock_page(page);
>> +            if (strip_unit_end && (folio->index > strip_unit_end)) {
>> +                dout("end of strip unit %p\n", folio);
>> +                folio_unlock(folio);
>>                   break;
>>               }
>> -            if (PageWriteback(page) || PageFsCache(page)) {
>> +            if (folio_test_writeback(folio) ||
>> +                folio_test_fscache(folio)) {
>>                   if (wbc->sync_mode == WB_SYNC_NONE) {
>> -                    dout("%p under writeback\n", page);
>> -                    unlock_page(page);
>> +                    dout("%p under writeback\n", folio);
>> +                    folio_unlock(folio);
>>                       continue;
>>                   }
>> -                dout("waiting on writeback %p\n", page);
>> -                wait_on_page_writeback(page);
>> -                wait_on_page_fscache(page);
>> +                dout("waiting on writeback %p\n", folio);
>> +                folio_wait_writeback(folio);
>> +                folio_wait_fscache(folio);
>>               }
>>   -            if (!clear_page_dirty_for_io(page)) {
>> -                dout("%p !clear_page_dirty_for_io\n", page);
>> -                unlock_page(page);
>> +            if (!folio_clear_dirty_for_io(folio)) {
>> +                dout("%p !folio_clear_dirty_for_io\n", folio);
>> +                folio_unlock(folio);
>>                   continue;
>>               }
>>                 /*
>>                * We have something to write.  If this is
>> -             * the first locked page this time through,
>> +             * the first locked folio this time through,
>>                * calculate max possinle write size and
>>                * allocate a page array
>>                */
>> @@ -1105,7 +1104,7 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                   u32 xlen;
>>                     /* prepare async write request */
>> -                offset = (u64)page_offset(page);
>> +                offset = folio_pos(folio);
>> ceph_calc_file_object_mapping(&ci->i_layout,
>>                                     offset, wsize,
>>                                     &objnum, &objoff,
>> @@ -1113,7 +1112,7 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                   len = xlen;
>>                     num_ops = 1;
>> -                strip_unit_end = page->index +
>> +                strip_unit_end = folio->index +
>>                       ((len - 1) >> PAGE_SHIFT);
>>                     BUG_ON(pages);
>> @@ -1128,23 +1127,23 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                   }
>>                     len = 0;
>> -            } else if (page->index !=
>> +            } else if (folio->index !=
>>                      (offset + len) >> PAGE_SHIFT) {
>>                   if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS :
>>                                    CEPH_OSD_MAX_OPS)) {
>> -                    redirty_page_for_writepage(wbc, page);
>> -                    unlock_page(page);
>> +                    folio_redirty_for_writepage(wbc, folio);
>> +                    folio_unlock(folio);
>>                       break;
>>                   }
>>                     num_ops++;
>> -                offset = (u64)page_offset(page);
>> +                offset = (u64)folio_pos(folio);
>>                   len = 0;
>>               }
>>                 /* note position of first page in fbatch */
>> -            dout("%p will write page %p idx %lu\n",
>> -                 inode, page, page->index);
>> +            dout("%p will write folio %p idx %lu\n",
>> +                 inode, folio, folio->index);
>>                 if (atomic_long_inc_return(&fsc->writeback_count) >
>>                   CONGESTION_ON_THRESH(
>> @@ -1153,7 +1152,7 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                 if (IS_ENCRYPTED(inode)) {
>>                   pages[locked_pages] =
>> -                    fscrypt_encrypt_pagecache_blocks(page,
>> + fscrypt_encrypt_pagecache_blocks(&folio->page,
>>                           PAGE_SIZE, 0,
>>                           locked_pages ? GFP_NOWAIT : GFP_NOFS);
>>                   if (IS_ERR(pages[locked_pages])) {
>> @@ -1163,17 +1162,17 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                       /* better not fail on first page! */
>>                       BUG_ON(locked_pages == 0);
>>                       pages[locked_pages] = NULL;
>> -                    redirty_page_for_writepage(wbc, page);
>> -                    unlock_page(page);
>> +                    folio_redirty_for_writepage(wbc, folio);
>> +                    folio_unlock(folio);
>>                       break;
>>                   }
>>                   ++locked_pages;
>>               } else {
>> -                pages[locked_pages++] = page;
>> +                pages[locked_pages++] = &folio->page;
>>               }
>>                 fbatch.folios[i] = NULL;
>> -            len += thp_size(page);
>> +            len += folio_size(folio);
>>           }
>>             /* did we get anything? */
>> @@ -1222,7 +1221,7 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>               BUG_ON(IS_ERR(req));
>>           }
>>           BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 
>> 1]) +
>> -                 thp_size(pages[locked_pages - 1]) - offset);
>> +                 folio_size(folio) - offset);
>>             if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
>>               rc = -EIO;
>> @@ -1236,9 +1235,9 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>           data_pages = pages;
>>           op_idx = 0;
>>           for (i = 0; i < locked_pages; i++) {
>> -            struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
>> +            struct folio *folio = 
>> page_folio(ceph_fscrypt_pagecache_page(pages[i]));
>>   -            u64 cur_offset = page_offset(page);
>> +            u64 cur_offset = folio_pos(folio);
>>               /*
>>                * Discontinuity in page range? Ceph can handle that by 
>> just passing
>>                * multiple extents in the write op.
>> @@ -1267,10 +1266,10 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                   op_idx++;
>>               }
>>   -            set_page_writeback(page);
>> +            folio_start_writeback(folio);
>>               if (caching)
>> -                ceph_set_page_fscache(page);
>> -            len += thp_size(page);
>> +                ceph_set_page_fscache(pages[i]);
>> +            len += folio_size(folio);
>>           }
>>           ceph_fscache_write_to_cache(inode, offset, len, caching);
>>   @@ -1280,7 +1279,7 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>               /* writepages_finish() clears writeback pages
>>                * according to the data length, so make sure
>>                * data length covers all locked pages */
>> -            u64 min_len = len + 1 - thp_size(page);
>> +            u64 min_len = len + 1 - folio_size(folio);
>>               len = get_writepages_data_length(inode, pages[i - 1],
>>                                offset);
>>               len = max(len, min_len);
>> @@ -1360,7 +1359,6 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>           if (wbc->sync_mode != WB_SYNC_NONE &&
>>               start_index == 0 && /* all dirty pages were checked */
>>               !ceph_wbc.head_snapc) {
>> -            struct page *page;
>>               unsigned i, nr;
>>               index = 0;
>>               while ((index <= end) &&
>> @@ -1369,10 +1367,10 @@ static int ceph_writepages_start(struct 
>> address_space *mapping,
>>                           PAGECACHE_TAG_WRITEBACK,
>>                           &fbatch))) {
>>                   for (i = 0; i < nr; i++) {
>> -                    page = &fbatch.folios[i]->page;
>> -                    if (page_snap_context(page) != snapc)
>> +                    struct folio *folio = fbatch.folios[i];
>> +                    if (folio->private != snapc)
>
> Here IMO we should reuse and rename 'page_snap_context()' --> 
> 'folio_snap_context()' instead of 'folio->private' directly. As I 
> remembered if the dirty bit is not set the `page->private` still could 
> be non-NULL in some cases ?
>
Hi Willy,

Could you check the above comment ? There was one bug we tried to fix 
about this last year or earlier with Jeff as I remembered.

Thanks

- Xiubo



> Thanks
>
> - Xiubo
>
>
>>                           continue;
>> -                    wait_on_page_writeback(page);
>> +                    folio_wait_writeback(folio);
>>                   }
>>                   folio_batch_release(&fbatch);
>>                   cond_resched();
diff mbox series

Patch

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f4863078f7fe..9a0a79833eb0 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1018,7 +1018,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 		int num_ops = 0, op_idx;
 		unsigned i, nr_folios, max_pages, locked_pages = 0;
 		struct page **pages = NULL, **data_pages;
-		struct page *page;
+		struct folio *folio;
 		pgoff_t strip_unit_end = 0;
 		u64 offset = 0, len = 0;
 		bool from_pool = false;
@@ -1032,22 +1032,22 @@  static int ceph_writepages_start(struct address_space *mapping,
 		if (!nr_folios && !locked_pages)
 			break;
 		for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
-			page = &fbatch.folios[i]->page;
-			dout("? %p idx %lu\n", page, page->index);
+			folio = fbatch.folios[i];
+			dout("? %p idx %lu\n", folio, folio->index);
 			if (locked_pages == 0)
-				lock_page(page);  /* first page */
-			else if (!trylock_page(page))
+				folio_lock(folio);  /* first folio */
+			else if (!folio_trylock(folio))
 				break;
 
 			/* only dirty pages, or our accounting breaks */
-			if (unlikely(!PageDirty(page)) ||
-			    unlikely(page->mapping != mapping)) {
-				dout("!dirty or !mapping %p\n", page);
-				unlock_page(page);
+			if (unlikely(!folio_test_dirty(folio)) ||
+			    unlikely(folio->mapping != mapping)) {
+				dout("!dirty or !mapping %p\n", folio);
+				folio_unlock(folio);
 				continue;
 			}
 			/* only if matching snap context */
-			pgsnapc = page_snap_context(page);
+			pgsnapc = folio->private;
 			if (pgsnapc != snapc) {
 				dout("page snapc %p %lld != oldest %p %lld\n",
 				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
@@ -1055,12 +1055,10 @@  static int ceph_writepages_start(struct address_space *mapping,
 				    !ceph_wbc.head_snapc &&
 				    wbc->sync_mode != WB_SYNC_NONE)
 					should_loop = true;
-				unlock_page(page);
+				folio_unlock(folio);
 				continue;
 			}
-			if (page_offset(page) >= ceph_wbc.i_size) {
-				struct folio *folio = page_folio(page);
-
+			if (folio_pos(folio) >= ceph_wbc.i_size) {
 				dout("folio at %lu beyond eof %llu\n",
 				     folio->index, ceph_wbc.i_size);
 				if ((ceph_wbc.size_stable ||
@@ -1071,31 +1069,32 @@  static int ceph_writepages_start(struct address_space *mapping,
 				folio_unlock(folio);
 				continue;
 			}
-			if (strip_unit_end && (page->index > strip_unit_end)) {
-				dout("end of strip unit %p\n", page);
-				unlock_page(page);
+			if (strip_unit_end && (folio->index > strip_unit_end)) {
+				dout("end of strip unit %p\n", folio);
+				folio_unlock(folio);
 				break;
 			}
-			if (PageWriteback(page) || PageFsCache(page)) {
+			if (folio_test_writeback(folio) ||
+			    folio_test_fscache(folio)) {
 				if (wbc->sync_mode == WB_SYNC_NONE) {
-					dout("%p under writeback\n", page);
-					unlock_page(page);
+					dout("%p under writeback\n", folio);
+					folio_unlock(folio);
 					continue;
 				}
-				dout("waiting on writeback %p\n", page);
-				wait_on_page_writeback(page);
-				wait_on_page_fscache(page);
+				dout("waiting on writeback %p\n", folio);
+				folio_wait_writeback(folio);
+				folio_wait_fscache(folio);
 			}
 
-			if (!clear_page_dirty_for_io(page)) {
-				dout("%p !clear_page_dirty_for_io\n", page);
-				unlock_page(page);
+			if (!folio_clear_dirty_for_io(folio)) {
+				dout("%p !folio_clear_dirty_for_io\n", folio);
+				folio_unlock(folio);
 				continue;
 			}
 
 			/*
 			 * We have something to write.  If this is
-			 * the first locked page this time through,
+			 * the first locked folio this time through,
 			 * calculate max possinle write size and
 			 * allocate a page array
 			 */
@@ -1105,7 +1104,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 				u32 xlen;
 
 				/* prepare async write request */
-				offset = (u64)page_offset(page);
+				offset = folio_pos(folio);
 				ceph_calc_file_object_mapping(&ci->i_layout,
 							      offset, wsize,
 							      &objnum, &objoff,
@@ -1113,7 +1112,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 				len = xlen;
 
 				num_ops = 1;
-				strip_unit_end = page->index +
+				strip_unit_end = folio->index +
 					((len - 1) >> PAGE_SHIFT);
 
 				BUG_ON(pages);
@@ -1128,23 +1127,23 @@  static int ceph_writepages_start(struct address_space *mapping,
 				}
 
 				len = 0;
-			} else if (page->index !=
+			} else if (folio->index !=
 				   (offset + len) >> PAGE_SHIFT) {
 				if (num_ops >= (from_pool ?  CEPH_OSD_SLAB_OPS :
 							     CEPH_OSD_MAX_OPS)) {
-					redirty_page_for_writepage(wbc, page);
-					unlock_page(page);
+					folio_redirty_for_writepage(wbc, folio);
+					folio_unlock(folio);
 					break;
 				}
 
 				num_ops++;
-				offset = (u64)page_offset(page);
+				offset = (u64)folio_pos(folio);
 				len = 0;
 			}
 
 			/* note position of first page in fbatch */
-			dout("%p will write page %p idx %lu\n",
-			     inode, page, page->index);
+			dout("%p will write folio %p idx %lu\n",
+			     inode, folio, folio->index);
 
 			if (atomic_long_inc_return(&fsc->writeback_count) >
 			    CONGESTION_ON_THRESH(
@@ -1153,7 +1152,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 
 			if (IS_ENCRYPTED(inode)) {
 				pages[locked_pages] =
-					fscrypt_encrypt_pagecache_blocks(page,
+					fscrypt_encrypt_pagecache_blocks(&folio->page,
 						PAGE_SIZE, 0,
 						locked_pages ? GFP_NOWAIT : GFP_NOFS);
 				if (IS_ERR(pages[locked_pages])) {
@@ -1163,17 +1162,17 @@  static int ceph_writepages_start(struct address_space *mapping,
 					/* better not fail on first page! */
 					BUG_ON(locked_pages == 0);
 					pages[locked_pages] = NULL;
-					redirty_page_for_writepage(wbc, page);
-					unlock_page(page);
+					folio_redirty_for_writepage(wbc, folio);
+					folio_unlock(folio);
 					break;
 				}
 				++locked_pages;
 			} else {
-				pages[locked_pages++] = page;
+				pages[locked_pages++] = &folio->page;
 			}
 
 			fbatch.folios[i] = NULL;
-			len += thp_size(page);
+			len += folio_size(folio);
 		}
 
 		/* did we get anything? */
@@ -1222,7 +1221,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 			BUG_ON(IS_ERR(req));
 		}
 		BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
-			     thp_size(pages[locked_pages - 1]) - offset);
+			     folio_size(folio) - offset);
 
 		if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
 			rc = -EIO;
@@ -1236,9 +1235,9 @@  static int ceph_writepages_start(struct address_space *mapping,
 		data_pages = pages;
 		op_idx = 0;
 		for (i = 0; i < locked_pages; i++) {
-			struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
+			struct folio *folio = page_folio(ceph_fscrypt_pagecache_page(pages[i]));
 
-			u64 cur_offset = page_offset(page);
+			u64 cur_offset = folio_pos(folio);
 			/*
 			 * Discontinuity in page range? Ceph can handle that by just passing
 			 * multiple extents in the write op.
@@ -1267,10 +1266,10 @@  static int ceph_writepages_start(struct address_space *mapping,
 				op_idx++;
 			}
 
-			set_page_writeback(page);
+			folio_start_writeback(folio);
 			if (caching)
-				ceph_set_page_fscache(page);
-			len += thp_size(page);
+				ceph_set_page_fscache(pages[i]);
+			len += folio_size(folio);
 		}
 		ceph_fscache_write_to_cache(inode, offset, len, caching);
 
@@ -1280,7 +1279,7 @@  static int ceph_writepages_start(struct address_space *mapping,
 			/* writepages_finish() clears writeback pages
 			 * according to the data length, so make sure
 			 * data length covers all locked pages */
-			u64 min_len = len + 1 - thp_size(page);
+			u64 min_len = len + 1 - folio_size(folio);
 			len = get_writepages_data_length(inode, pages[i - 1],
 							 offset);
 			len = max(len, min_len);
@@ -1360,7 +1359,6 @@  static int ceph_writepages_start(struct address_space *mapping,
 		if (wbc->sync_mode != WB_SYNC_NONE &&
 		    start_index == 0 && /* all dirty pages were checked */
 		    !ceph_wbc.head_snapc) {
-			struct page *page;
 			unsigned i, nr;
 			index = 0;
 			while ((index <= end) &&
@@ -1369,10 +1367,10 @@  static int ceph_writepages_start(struct address_space *mapping,
 						PAGECACHE_TAG_WRITEBACK,
 						&fbatch))) {
 				for (i = 0; i < nr; i++) {
-					page = &fbatch.folios[i]->page;
-					if (page_snap_context(page) != snapc)
+					struct folio *folio = fbatch.folios[i];
+					if (folio->private != snapc)
 						continue;
-					wait_on_page_writeback(page);
+					folio_wait_writeback(folio);
 				}
 				folio_batch_release(&fbatch);
 				cond_resched();