diff mbox series

[01/21] mm/page_isolation: protect cma from isolate_single_pageblock

Message ID 20220913195508.3511038-2-opendmb@gmail.com
State New
Headers show
Series [01/21] mm/page_isolation: protect cma from isolate_single_pageblock | expand

Commit Message

Doug Berger Sept. 13, 2022, 7:54 p.m. UTC
The function set_migratetype_isolate() has special handling for
pageblocks of MIGRATE_CMA type that protects them from being
isolated for MIGRATE_MOVABLE requests.

Since isolate_single_pageblock() doesn't receive the migratetype
argument of start_isolate_page_range() it used the migratetype
of the pageblock instead of the requested migratetype which
defeats this MIGRATE_CMA check.

This allows an attempt to create a gigantic page within a CMA
region to change the migratetype of the first and last pageblocks
from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after
failure, which corrupts the CMA region.

The calls to (un)set_migratetype_isolate() for the first and last
pageblocks of the start_isolate_page_range() are moved back into
that function to allow access to its migratetype argument and make
it easier to see how all of the pageblocks in the range are
isolated.

Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity")
Signed-off-by: Doug Berger <opendmb@gmail.com>
---
 mm/page_isolation.c | 75 +++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 40 deletions(-)

Comments

Zi Yan Sept. 14, 2022, 12:02 a.m. UTC | #1
On 13 Sep 2022, at 15:54, Doug Berger wrote:

> The function set_migratetype_isolate() has special handling for
> pageblocks of MIGRATE_CMA type that protects them from being
> isolated for MIGRATE_MOVABLE requests.
>
> Since isolate_single_pageblock() doesn't receive the migratetype
> argument of start_isolate_page_range() it used the migratetype
> of the pageblock instead of the requested migratetype which
> defeats this MIGRATE_CMA check.
>
> This allows an attempt to create a gigantic page within a CMA
> region to change the migratetype of the first and last pageblocks
> from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after
> failure, which corrupts the CMA region.
>
> The calls to (un)set_migratetype_isolate() for the first and last
> pageblocks of the start_isolate_page_range() are moved back into
> that function to allow access to its migratetype argument and make
> it easier to see how all of the pageblocks in the range are
> isolated.
>
> Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity")
> Signed-off-by: Doug Berger <opendmb@gmail.com>
> ---
>  mm/page_isolation.c | 75 +++++++++++++++++++++------------------------
>  1 file changed, 35 insertions(+), 40 deletions(-)

Thanks for the fix.

Why not just pass migratetype into isolate_single_pageblock() and use
it when set_migratetype_isolate() is used? That would have much
fewer changes. What is the reason of pulling skip isolation logic out?

Ultimately, I would like to make MIGRATE_ISOLATE a separate bit,
so that migratetype will not be overwritten during page isolation.
Then, set_migratetype_isolate() and start_isolate_page_range()
will not have migratetype to set in error recovery any more.
That is on my TODO.

>
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 9d73dc38e3d7..8e16aa22cb61 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>   * @flags:			isolation flags
>   * @gfp_flags:			GFP flags used for migrating pages
>   * @isolate_before:	isolate the pageblock before the boundary_pfn
> - * @skip_isolation:	the flag to skip the pageblock isolation in second
> - *			isolate_single_pageblock()
>   *
>   * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
>   * pageblock. When not all pageblocks within a page are isolated at the same
> @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>   * the in-use page then splitting the free page.
>   */
>  static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
> -			gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
> +			gfp_t gfp_flags, bool isolate_before)
>  {
> -	unsigned char saved_mt;
>  	unsigned long start_pfn;
>  	unsigned long isolate_pageblock;
>  	unsigned long pfn;
> @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>  	start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
>  				      zone->zone_start_pfn);
>
> -	saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
> -
> -	if (skip_isolation)
> -		VM_BUG_ON(!is_migrate_isolate(saved_mt));
> -	else {
> -		ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
> -				isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
> -
> -		if (ret)
> -			return ret;
> -	}
> -
>  	/*
>  	 * Bail out early when the to-be-isolated pageblock does not form
>  	 * a free or in-use page across boundary_pfn:
> @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>  					ret = set_migratetype_isolate(page, page_mt,
>  						flags, head_pfn, head_pfn + nr_pages);
>  					if (ret)
> -						goto failed;
> +						return ret;
>  				}
>
>  				ret = __alloc_contig_migrate_range(&cc, head_pfn,
> @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>  					unset_migratetype_isolate(page, page_mt);
>
>  				if (ret)
> -					goto failed;
> +					return -EBUSY;
>  				/*
>  				 * reset pfn to the head of the free page, so
>  				 * that the free page handling code above can split
> @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>  				while (!PageBuddy(pfn_to_page(outer_pfn))) {
>  					/* stop if we cannot find the free page */
>  					if (++order >= MAX_ORDER)
> -						goto failed;
> +						return -EBUSY;
>  					outer_pfn &= ~0UL << order;
>  				}
>  				pfn = outer_pfn;
>  				continue;
>  			} else
>  #endif
> -				goto failed;
> +				return -EBUSY;
>  		}
>
>  		pfn++;
>  	}
>  	return 0;
> -failed:
> -	/* restore the original migratetype */
> -	if (!skip_isolation)
> -		unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
> -	return -EBUSY;
>  }
>
>  /**
> @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>  	unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
>  	unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
>  	int ret;
> -	bool skip_isolation = false;
>
>  	/* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
> -	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
> +	ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype,
> +			flags, isolate_start, isolate_start + pageblock_nr_pages);
>  	if (ret)
>  		return ret;
> -
> -	if (isolate_start == isolate_end - pageblock_nr_pages)
> -		skip_isolation = true;
> +	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false);
> +	if (ret)
> +		goto unset_start_block;
>
>  	/* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
> -	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
> +	pfn = isolate_end - pageblock_nr_pages;
> +	if (isolate_start != pfn) {
> +		ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype,
> +				flags, pfn, pfn + pageblock_nr_pages);
> +		if (ret)
> +			goto unset_start_block;
> +	}
> +	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true);
>  	if (ret) {
> -		unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> -		return ret;
> +		if (isolate_start != pfn)
> +			goto unset_end_block;
> +		else
> +			goto unset_start_block;
>  	}
>
>  	/* skip isolated pageblocks at the beginning and end */
> @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>  	     pfn += pageblock_nr_pages) {
>  		page = __first_valid_page(pfn, pageblock_nr_pages);
>  		if (page && set_migratetype_isolate(page, migratetype, flags,
> -					start_pfn, end_pfn)) {
> -			undo_isolate_page_range(isolate_start, pfn, migratetype);
> -			unset_migratetype_isolate(
> -				pfn_to_page(isolate_end - pageblock_nr_pages),
> -				migratetype);
> -			return -EBUSY;
> -		}
> +					start_pfn, end_pfn))
> +			goto unset_isolated_blocks;
>  	}
>  	return 0;
> +
> +unset_isolated_blocks:
> +	ret = -EBUSY;
> +	undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn,
> +				migratetype);
> +unset_end_block:
> +	unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages),
> +				  migratetype);
> +unset_start_block:
> +	unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> +	return ret;
>  }
>
>  /*
> -- 
> 2.25.1


--
Best Regards,
Yan, Zi
Doug Berger Sept. 14, 2022, 1:47 a.m. UTC | #2
On 9/13/2022 6:09 PM, Zi Yan wrote:
> On 13 Sep 2022, at 20:59, Doug Berger wrote:
> 
>> On 9/13/2022 5:02 PM, Zi Yan wrote:
>>> On 13 Sep 2022, at 15:54, Doug Berger wrote:
>>>
>>>> The function set_migratetype_isolate() has special handling for
>>>> pageblocks of MIGRATE_CMA type that protects them from being
>>>> isolated for MIGRATE_MOVABLE requests.
>>>>
>>>> Since isolate_single_pageblock() doesn't receive the migratetype
>>>> argument of start_isolate_page_range() it used the migratetype
>>>> of the pageblock instead of the requested migratetype which
>>>> defeats this MIGRATE_CMA check.
>>>>
>>>> This allows an attempt to create a gigantic page within a CMA
>>>> region to change the migratetype of the first and last pageblocks
>>>> from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after
>>>> failure, which corrupts the CMA region.
>>>>
>>>> The calls to (un)set_migratetype_isolate() for the first and last
>>>> pageblocks of the start_isolate_page_range() are moved back into
>>>> that function to allow access to its migratetype argument and make
>>>> it easier to see how all of the pageblocks in the range are
>>>> isolated.
>>>>
>>>> Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity")
>>>> Signed-off-by: Doug Berger <opendmb@gmail.com>
>>>> ---
>>>>    mm/page_isolation.c | 75 +++++++++++++++++++++------------------------
>>>>    1 file changed, 35 insertions(+), 40 deletions(-)
>>>
>>> Thanks for the fix.
>> Thanks for the review.
>>
>>>
>>> Why not just pass migratetype into isolate_single_pageblock() and use
>>> it when set_migratetype_isolate() is used? That would have much
>>> fewer changes. What is the reason of pulling skip isolation logic out?
>> I found the skip_isolation logic confusing and thought that setting and restoring the migratetype within the same function and consolidating the error recovery paths also within that function was easier to understand and less prone to accidental breakage.
>>
>> In particular, setting MIGRATE_ISOLATE in isolate_single_pageblock() and having to remember to unset it in start_isolate_page_range() differently on different error paths was troublesome for me.
> 
> Wouldn't this work as well?
> 
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index c1307d1bea81..a312cabd0d95 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -288,6 +288,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>    * @isolate_before:    isolate the pageblock before the boundary_pfn
>    * @skip_isolation:    the flag to skip the pageblock isolation in second
>    *                     isolate_single_pageblock()
> + * @migratetype:       Migrate type to set in error recovery.
>    *
>    * Free and in-use pages can be as big as MAX_ORDER and contain more than one
>    * pageblock. When not all pageblocks within a page are isolated at the same
> @@ -302,9 +303,9 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>    * the in-use page then splitting the free page.
>    */
>   static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
> -                       gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
> +                       gfp_t gfp_flags, bool isolate_before, bool skip_isolation,
> +                       int migratetype)
>   {
> -       unsigned char saved_mt;
>          unsigned long start_pfn;
>          unsigned long isolate_pageblock;
>          unsigned long pfn;
> @@ -328,12 +329,10 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>          start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
>                                        zone->zone_start_pfn);
> 
> -       saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
> -
>          if (skip_isolation)
> -               VM_BUG_ON(!is_migrate_isolate(saved_mt));
> +               VM_BUG_ON(!is_migrate_isolate(get_pageblock_migratetype(pfn_to_page(isolate_pageblock))));
>          else {
> -               ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
> +               ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype, flags,
>                                  isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
> 
>                  if (ret)
> @@ -475,7 +474,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>   failed:
>          /* restore the original migratetype */
>          if (!skip_isolation)
> -               unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
> +               unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
>          return -EBUSY;
>   }
> 
> @@ -537,7 +536,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>          bool skip_isolation = false;
> 
>          /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
> -       ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
> +       ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false,
> +                               skip_isolation, migratetype);
>          if (ret)
>                  return ret;
> 
> @@ -545,7 +545,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>                  skip_isolation = true;
> 
>          /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
> -       ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
> +       ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true,
> +                               skip_isolation, migratetype);
>          if (ret) {
>                  unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>                  return ret;
> 
I would expect this to work as well, but it is not my preference.

>>
>> It could certainly be done differently, but this was my preference.
> 
> A smaller patch can make review easier, right?
It certainly can. Especially when it is for code that you are familiar 
with ;).

I am happy to have you submit a patch to fix this issue and submit it to 
stable for backporting. Fixing the issue is what's important to me.

> 
>>>
>>> Ultimately, I would like to make MIGRATE_ISOLATE a separate bit,
>>> so that migratetype will not be overwritten during page isolation.
>>> Then, set_migratetype_isolate() and start_isolate_page_range()
>>> will not have migratetype to set in error recovery any more.
>>> That is on my TODO.
>>>
>>>>
>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>>>> index 9d73dc38e3d7..8e16aa22cb61 100644
>>>> --- a/mm/page_isolation.c
>>>> +++ b/mm/page_isolation.c
>>>> @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>>>>     * @flags:			isolation flags
>>>>     * @gfp_flags:			GFP flags used for migrating pages
>>>>     * @isolate_before:	isolate the pageblock before the boundary_pfn
>>>> - * @skip_isolation:	the flag to skip the pageblock isolation in second
>>>> - *			isolate_single_pageblock()
>>>>     *
>>>>     * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
>>>>     * pageblock. When not all pageblocks within a page are isolated at the same
>>>> @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>>>>     * the in-use page then splitting the free page.
>>>>     */
>>>>    static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>> -			gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
>>>> +			gfp_t gfp_flags, bool isolate_before)
>>>>    {
>>>> -	unsigned char saved_mt;
>>>>    	unsigned long start_pfn;
>>>>    	unsigned long isolate_pageblock;
>>>>    	unsigned long pfn;
>>>> @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>    	start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
>>>>    				      zone->zone_start_pfn);
>>>>
>>>> -	saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
>>>> -
>>>> -	if (skip_isolation)
>>>> -		VM_BUG_ON(!is_migrate_isolate(saved_mt));
>>>> -	else {
>>>> -		ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
>>>> -				isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
>>>> -
>>>> -		if (ret)
>>>> -			return ret;
>>>> -	}
>>>> -
>>>>    	/*
>>>>    	 * Bail out early when the to-be-isolated pageblock does not form
>>>>    	 * a free or in-use page across boundary_pfn:
>>>> @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>    					ret = set_migratetype_isolate(page, page_mt,
>>>>    						flags, head_pfn, head_pfn + nr_pages);
>>>>    					if (ret)
>>>> -						goto failed;
>>>> +						return ret;
>>>>    				}
>>>>
>>>>    				ret = __alloc_contig_migrate_range(&cc, head_pfn,
>>>> @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>    					unset_migratetype_isolate(page, page_mt);
>>>>
>>>>    				if (ret)
>>>> -					goto failed;
>>>> +					return -EBUSY;
>>>>    				/*
>>>>    				 * reset pfn to the head of the free page, so
>>>>    				 * that the free page handling code above can split
>>>> @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>    				while (!PageBuddy(pfn_to_page(outer_pfn))) {
>>>>    					/* stop if we cannot find the free page */
>>>>    					if (++order >= MAX_ORDER)
>>>> -						goto failed;
>>>> +						return -EBUSY;
>>>>    					outer_pfn &= ~0UL << order;
>>>>    				}
>>>>    				pfn = outer_pfn;
>>>>    				continue;
>>>>    			} else
>>>>    #endif
>>>> -				goto failed;
>>>> +				return -EBUSY;
>>>>    		}
>>>>
>>>>    		pfn++;
>>>>    	}
>>>>    	return 0;
>>>> -failed:
>>>> -	/* restore the original migratetype */
>>>> -	if (!skip_isolation)
>>>> -		unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
>>>> -	return -EBUSY;
>>>>    }
>>>>
>>>>    /**
>>>> @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>>    	unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
>>>>    	unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
>>>>    	int ret;
>>>> -	bool skip_isolation = false;
>>>>
>>>>    	/* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
>>>> -	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
>>>> +	ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype,
>>>> +			flags, isolate_start, isolate_start + pageblock_nr_pages);
>>>>    	if (ret)
>>>>    		return ret;
>>>> -
>>>> -	if (isolate_start == isolate_end - pageblock_nr_pages)
>>>> -		skip_isolation = true;
>>>> +	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false);
>>>> +	if (ret)
>>>> +		goto unset_start_block;
>>>>
>>>>    	/* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
>>>> -	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
>>>> +	pfn = isolate_end - pageblock_nr_pages;
>>>> +	if (isolate_start != pfn) {
>>>> +		ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype,
>>>> +				flags, pfn, pfn + pageblock_nr_pages);
>>>> +		if (ret)
>>>> +			goto unset_start_block;
>>>> +	}
>>>> +	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true);
>>>>    	if (ret) {
>>>> -		unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>>>> -		return ret;
>>>> +		if (isolate_start != pfn)
>>>> +			goto unset_end_block;
>>>> +		else
>>>> +			goto unset_start_block;
>>>>    	}
>>>>
>>>>    	/* skip isolated pageblocks at the beginning and end */
>>>> @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>>    	     pfn += pageblock_nr_pages) {
>>>>    		page = __first_valid_page(pfn, pageblock_nr_pages);
>>>>    		if (page && set_migratetype_isolate(page, migratetype, flags,
>>>> -					start_pfn, end_pfn)) {
>>>> -			undo_isolate_page_range(isolate_start, pfn, migratetype);
>>>> -			unset_migratetype_isolate(
>>>> -				pfn_to_page(isolate_end - pageblock_nr_pages),
>>>> -				migratetype);
>>>> -			return -EBUSY;
>>>> -		}
>>>> +					start_pfn, end_pfn))
>>>> +			goto unset_isolated_blocks;
>>>>    	}
>>>>    	return 0;
>>>> +
>>>> +unset_isolated_blocks:
>>>> +	ret = -EBUSY;
>>>> +	undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn,
>>>> +				migratetype);
>>>> +unset_end_block:
>>>> +	unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages),
>>>> +				  migratetype);
>>>> +unset_start_block:
>>>> +	unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>>>> +	return ret;
>>>>    }
>>>>
>>>>    /*
>>>> -- 
>>>> 2.25.1
>>>
>>>
>>> --
>>> Best Regards,
>>> Yan, Zi
> 
> 
> --
> Best Regards,
> Yan, Zi
Thanks for your efforts to get alloc_contig_range to work at pageblock 
granularity!
-Doug
Doug Berger Sept. 14, 2022, 5:27 p.m. UTC | #3
On 9/13/2022 6:53 PM, Zi Yan wrote:
> On 13 Sep 2022, at 21:47, Doug Berger wrote:
> 
>> On 9/13/2022 6:09 PM, Zi Yan wrote:
>>> On 13 Sep 2022, at 20:59, Doug Berger wrote:
>>>
>>>> On 9/13/2022 5:02 PM, Zi Yan wrote:
>>>>> On 13 Sep 2022, at 15:54, Doug Berger wrote:
>>>>>
>>>>>> The function set_migratetype_isolate() has special handling for
>>>>>> pageblocks of MIGRATE_CMA type that protects them from being
>>>>>> isolated for MIGRATE_MOVABLE requests.
>>>>>>
>>>>>> Since isolate_single_pageblock() doesn't receive the migratetype
>>>>>> argument of start_isolate_page_range() it used the migratetype
>>>>>> of the pageblock instead of the requested migratetype which
>>>>>> defeats this MIGRATE_CMA check.
>>>>>>
>>>>>> This allows an attempt to create a gigantic page within a CMA
>>>>>> region to change the migratetype of the first and last pageblocks
>>>>>> from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after
>>>>>> failure, which corrupts the CMA region.
>>>>>>
>>>>>> The calls to (un)set_migratetype_isolate() for the first and last
>>>>>> pageblocks of the start_isolate_page_range() are moved back into
>>>>>> that function to allow access to its migratetype argument and make
>>>>>> it easier to see how all of the pageblocks in the range are
>>>>>> isolated.
>>>>>>
>>>>>> Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity")
>>>>>> Signed-off-by: Doug Berger <opendmb@gmail.com>
>>>>>> ---
>>>>>>     mm/page_isolation.c | 75 +++++++++++++++++++++------------------------
>>>>>>     1 file changed, 35 insertions(+), 40 deletions(-)
>>>>>
>>>>> Thanks for the fix.
>>>> Thanks for the review.
>>>>
>>>>>
>>>>> Why not just pass migratetype into isolate_single_pageblock() and use
>>>>> it when set_migratetype_isolate() is used? That would have much
>>>>> fewer changes. What is the reason of pulling skip isolation logic out?
>>>> I found the skip_isolation logic confusing and thought that setting and restoring the migratetype within the same function and consolidating the error recovery paths also within that function was easier to understand and less prone to accidental breakage.
>>>>
>>>> In particular, setting MIGRATE_ISOLATE in isolate_single_pageblock() and having to remember to unset it in start_isolate_page_range() differently on different error paths was troublesome for me.
>>>
>>> Wouldn't this work as well?
>>>
>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>>> index c1307d1bea81..a312cabd0d95 100644
>>> --- a/mm/page_isolation.c
>>> +++ b/mm/page_isolation.c
>>> @@ -288,6 +288,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>>>     * @isolate_before:    isolate the pageblock before the boundary_pfn
>>>     * @skip_isolation:    the flag to skip the pageblock isolation in second
>>>     *                     isolate_single_pageblock()
>>> + * @migratetype:       Migrate type to set in error recovery.
>>>     *
>>>     * Free and in-use pages can be as big as MAX_ORDER and contain more than one
>>>     * pageblock. When not all pageblocks within a page are isolated at the same
>>> @@ -302,9 +303,9 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>>>     * the in-use page then splitting the free page.
>>>     */
>>>    static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>> -                       gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
>>> +                       gfp_t gfp_flags, bool isolate_before, bool skip_isolation,
>>> +                       int migratetype)
>>>    {
>>> -       unsigned char saved_mt;
>>>           unsigned long start_pfn;
>>>           unsigned long isolate_pageblock;
>>>           unsigned long pfn;
>>> @@ -328,12 +329,10 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>           start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
>>>                                         zone->zone_start_pfn);
>>>
>>> -       saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
>>> -
>>>           if (skip_isolation)
>>> -               VM_BUG_ON(!is_migrate_isolate(saved_mt));
>>> +               VM_BUG_ON(!is_migrate_isolate(get_pageblock_migratetype(pfn_to_page(isolate_pageblock))));
>>>           else {
>>> -               ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
>>> +               ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype, flags,
>>>                                   isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
>>>
>>>                   if (ret)
>>> @@ -475,7 +474,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>    failed:
>>>           /* restore the original migratetype */
>>>           if (!skip_isolation)
>>> -               unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
>>> +               unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
>>>           return -EBUSY;
>>>    }
>>>
>>> @@ -537,7 +536,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>           bool skip_isolation = false;
>>>
>>>           /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
>>> -       ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
>>> +       ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false,
>>> +                               skip_isolation, migratetype);
>>>           if (ret)
>>>                   return ret;
>>>
>>> @@ -545,7 +545,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>                   skip_isolation = true;
>>>
>>>           /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
>>> -       ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
>>> +       ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true,
>>> +                               skip_isolation, migratetype);
>>>           if (ret) {
>>>                   unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>>>                   return ret;
>>>
>> I would expect this to work as well, but it is not my preference.
>>
>>>>
>>>> It could certainly be done differently, but this was my preference.
>>>
>>> A smaller patch can make review easier, right?
>> It certainly can. Especially when it is for code that you are familiar with ;).
>>
>> I am happy to have you submit a patch to fix this issue and submit it to stable for backporting. Fixing the issue is what's important to me.
>>
> 
> I can submit the above as a patch. Is there a visible userspace issue, so that we need to
> backport it? Thanks.
I did not observe symptoms of the issue, but I did observe the issue 
when allocating gigantic huge pages as part of the hugetlbfs on a system 
with CMA regions.

My best guess is that it probably does not create a "functional" problem 
since the error would likely be cancelled out by subsequent CMA 
allocations restoring the pageblock migratetype. However, in the 
meantime the page allocator would handle free pages in those pageblocks 
without the MIGRATE_CMA qualifications which might impact driver 
performance. There might be other problems of which I am unaware.

The issue currently only exists in the wild in v5.19, so it would be 
nice to get it backported there to nip it in the bud.

> 
>>>
>>>>>
>>>>> Ultimately, I would like to make MIGRATE_ISOLATE a separate bit,
>>>>> so that migratetype will not be overwritten during page isolation.
>>>>> Then, set_migratetype_isolate() and start_isolate_page_range()
>>>>> will not have migratetype to set in error recovery any more.
>>>>> That is on my TODO.
>>>>>
>>>>>>
>>>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>>>>>> index 9d73dc38e3d7..8e16aa22cb61 100644
>>>>>> --- a/mm/page_isolation.c
>>>>>> +++ b/mm/page_isolation.c
>>>>>> @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>>>>>>      * @flags:			isolation flags
>>>>>>      * @gfp_flags:			GFP flags used for migrating pages
>>>>>>      * @isolate_before:	isolate the pageblock before the boundary_pfn
>>>>>> - * @skip_isolation:	the flag to skip the pageblock isolation in second
>>>>>> - *			isolate_single_pageblock()
>>>>>>      *
>>>>>>      * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
>>>>>>      * pageblock. When not all pageblocks within a page are isolated at the same
>>>>>> @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
>>>>>>      * the in-use page then splitting the free page.
>>>>>>      */
>>>>>>     static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>>> -			gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
>>>>>> +			gfp_t gfp_flags, bool isolate_before)
>>>>>>     {
>>>>>> -	unsigned char saved_mt;
>>>>>>     	unsigned long start_pfn;
>>>>>>     	unsigned long isolate_pageblock;
>>>>>>     	unsigned long pfn;
>>>>>> @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>>>     	start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
>>>>>>     				      zone->zone_start_pfn);
>>>>>>
>>>>>> -	saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
>>>>>> -
>>>>>> -	if (skip_isolation)
>>>>>> -		VM_BUG_ON(!is_migrate_isolate(saved_mt));
>>>>>> -	else {
>>>>>> -		ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
>>>>>> -				isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
>>>>>> -
>>>>>> -		if (ret)
>>>>>> -			return ret;
>>>>>> -	}
>>>>>> -
>>>>>>     	/*
>>>>>>     	 * Bail out early when the to-be-isolated pageblock does not form
>>>>>>     	 * a free or in-use page across boundary_pfn:
>>>>>> @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>>>     					ret = set_migratetype_isolate(page, page_mt,
>>>>>>     						flags, head_pfn, head_pfn + nr_pages);
>>>>>>     					if (ret)
>>>>>> -						goto failed;
>>>>>> +						return ret;
>>>>>>     				}
>>>>>>
>>>>>>     				ret = __alloc_contig_migrate_range(&cc, head_pfn,
>>>>>> @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>>>     					unset_migratetype_isolate(page, page_mt);
>>>>>>
>>>>>>     				if (ret)
>>>>>> -					goto failed;
>>>>>> +					return -EBUSY;
>>>>>>     				/*
>>>>>>     				 * reset pfn to the head of the free page, so
>>>>>>     				 * that the free page handling code above can split
>>>>>> @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>>>>     				while (!PageBuddy(pfn_to_page(outer_pfn))) {
>>>>>>     					/* stop if we cannot find the free page */
>>>>>>     					if (++order >= MAX_ORDER)
>>>>>> -						goto failed;
>>>>>> +						return -EBUSY;
>>>>>>     					outer_pfn &= ~0UL << order;
>>>>>>     				}
>>>>>>     				pfn = outer_pfn;
>>>>>>     				continue;
>>>>>>     			} else
>>>>>>     #endif
>>>>>> -				goto failed;
>>>>>> +				return -EBUSY;
>>>>>>     		}
>>>>>>
>>>>>>     		pfn++;
>>>>>>     	}
>>>>>>     	return 0;
>>>>>> -failed:
>>>>>> -	/* restore the original migratetype */
>>>>>> -	if (!skip_isolation)
>>>>>> -		unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
>>>>>> -	return -EBUSY;
>>>>>>     }
>>>>>>
>>>>>>     /**
>>>>>> @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>>>>     	unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
>>>>>>     	unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
>>>>>>     	int ret;
>>>>>> -	bool skip_isolation = false;
>>>>>>
>>>>>>     	/* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
>>>>>> -	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
>>>>>> +	ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype,
>>>>>> +			flags, isolate_start, isolate_start + pageblock_nr_pages);
>>>>>>     	if (ret)
>>>>>>     		return ret;
>>>>>> -
>>>>>> -	if (isolate_start == isolate_end - pageblock_nr_pages)
>>>>>> -		skip_isolation = true;
>>>>>> +	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false);
>>>>>> +	if (ret)
>>>>>> +		goto unset_start_block;
>>>>>>
>>>>>>     	/* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
>>>>>> -	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
>>>>>> +	pfn = isolate_end - pageblock_nr_pages;
>>>>>> +	if (isolate_start != pfn) {
>>>>>> +		ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype,
>>>>>> +				flags, pfn, pfn + pageblock_nr_pages);
>>>>>> +		if (ret)
>>>>>> +			goto unset_start_block;
>>>>>> +	}
>>>>>> +	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true);
>>>>>>     	if (ret) {
>>>>>> -		unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>>>>>> -		return ret;
>>>>>> +		if (isolate_start != pfn)
>>>>>> +			goto unset_end_block;
>>>>>> +		else
>>>>>> +			goto unset_start_block;
>>>>>>     	}
>>>>>>
>>>>>>     	/* skip isolated pageblocks at the beginning and end */
>>>>>> @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>>>>     	     pfn += pageblock_nr_pages) {
>>>>>>     		page = __first_valid_page(pfn, pageblock_nr_pages);
>>>>>>     		if (page && set_migratetype_isolate(page, migratetype, flags,
>>>>>> -					start_pfn, end_pfn)) {
>>>>>> -			undo_isolate_page_range(isolate_start, pfn, migratetype);
>>>>>> -			unset_migratetype_isolate(
>>>>>> -				pfn_to_page(isolate_end - pageblock_nr_pages),
>>>>>> -				migratetype);
>>>>>> -			return -EBUSY;
>>>>>> -		}
>>>>>> +					start_pfn, end_pfn))
>>>>>> +			goto unset_isolated_blocks;
>>>>>>     	}
>>>>>>     	return 0;
>>>>>> +
>>>>>> +unset_isolated_blocks:
>>>>>> +	ret = -EBUSY;
>>>>>> +	undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn,
>>>>>> +				migratetype);
>>>>>> +unset_end_block:
>>>>>> +	unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages),
>>>>>> +				  migratetype);
>>>>>> +unset_start_block:
>>>>>> +	unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>>>>>> +	return ret;
>>>>>>     }
>>>>>>
>>>>>>     /*
>>>>>> -- 
>>>>>> 2.25.1
>>>>>
>>>>>
>>>>> --
>>>>> Best Regards,
>>>>> Yan, Zi
>>>
>>>
>>> --
>>> Best Regards,
>>> Yan, Zi
>> Thanks for your efforts to get alloc_contig_range to work at pageblock granularity!
>> -Doug
> 
> 
> --
> Best Regards,
> Yan, Zi
Thanks,
-Doug
kernel test robot Sept. 16, 2022, 3:40 a.m. UTC | #4
Hi Doug,

I love your patch! Perhaps something to improve:

[auto build test WARNING on robh/for-next]
[also build test WARNING on linus/master v6.0-rc5]
[cannot apply to akpm-mm/mm-everything next-20220915]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Doug-Berger/mm-introduce-Designated-Movable-Blocks/20220914-040216
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
config: x86_64-randconfig-a012 (https://download.01.org/0day-ci/archive/20220916/202209161112.0TpDtDXi-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/10d000298e8a6b50a40ccc90d0d638105255f6e2
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Doug-Berger/mm-introduce-Designated-Movable-Blocks/20220914-040216
        git checkout 10d000298e8a6b50a40ccc90d0d638105255f6e2
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> mm/page_isolation.c:309:6: warning: unused variable 'ret' [-Wunused-variable]
           int ret;
               ^
   1 warning generated.


vim +/ret +309 mm/page_isolation.c

a5d76b54a3f3a4 KAMEZAWA Hiroyuki 2007-10-16  281  
b2c9e2fbba3253 Zi Yan            2022-05-12  282  /**
b2c9e2fbba3253 Zi Yan            2022-05-12  283   * isolate_single_pageblock() -- tries to isolate a pageblock that might be
b2c9e2fbba3253 Zi Yan            2022-05-12  284   * within a free or in-use page.
b2c9e2fbba3253 Zi Yan            2022-05-12  285   * @boundary_pfn:		pageblock-aligned pfn that a page might cross
88ee134320b831 Zi Yan            2022-05-24  286   * @flags:			isolation flags
b2c9e2fbba3253 Zi Yan            2022-05-12  287   * @gfp_flags:			GFP flags used for migrating pages
b2c9e2fbba3253 Zi Yan            2022-05-12  288   * @isolate_before:	isolate the pageblock before the boundary_pfn
b2c9e2fbba3253 Zi Yan            2022-05-12  289   *
b2c9e2fbba3253 Zi Yan            2022-05-12  290   * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
b2c9e2fbba3253 Zi Yan            2022-05-12  291   * pageblock. When not all pageblocks within a page are isolated at the same
b2c9e2fbba3253 Zi Yan            2022-05-12  292   * time, free page accounting can go wrong. For example, in the case of
b2c9e2fbba3253 Zi Yan            2022-05-12  293   * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
b2c9e2fbba3253 Zi Yan            2022-05-12  294   * [         MAX_ORDER-1         ]
b2c9e2fbba3253 Zi Yan            2022-05-12  295   * [  pageblock0  |  pageblock1  ]
b2c9e2fbba3253 Zi Yan            2022-05-12  296   * When either pageblock is isolated, if it is a free page, the page is not
b2c9e2fbba3253 Zi Yan            2022-05-12  297   * split into separate migratetype lists, which is supposed to; if it is an
b2c9e2fbba3253 Zi Yan            2022-05-12  298   * in-use page and freed later, __free_one_page() does not split the free page
b2c9e2fbba3253 Zi Yan            2022-05-12  299   * either. The function handles this by splitting the free page or migrating
b2c9e2fbba3253 Zi Yan            2022-05-12  300   * the in-use page then splitting the free page.
b2c9e2fbba3253 Zi Yan            2022-05-12  301   */
88ee134320b831 Zi Yan            2022-05-24  302  static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
10d000298e8a6b Doug Berger       2022-09-13  303  			gfp_t gfp_flags, bool isolate_before)
b2c9e2fbba3253 Zi Yan            2022-05-12  304  {
b2c9e2fbba3253 Zi Yan            2022-05-12  305  	unsigned long start_pfn;
b2c9e2fbba3253 Zi Yan            2022-05-12  306  	unsigned long isolate_pageblock;
b2c9e2fbba3253 Zi Yan            2022-05-12  307  	unsigned long pfn;
b2c9e2fbba3253 Zi Yan            2022-05-12  308  	struct zone *zone;
88ee134320b831 Zi Yan            2022-05-24 @309  	int ret;
b2c9e2fbba3253 Zi Yan            2022-05-12  310  
b2c9e2fbba3253 Zi Yan            2022-05-12  311  	VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
b2c9e2fbba3253 Zi Yan            2022-05-12  312  
b2c9e2fbba3253 Zi Yan            2022-05-12  313  	if (isolate_before)
b2c9e2fbba3253 Zi Yan            2022-05-12  314  		isolate_pageblock = boundary_pfn - pageblock_nr_pages;
b2c9e2fbba3253 Zi Yan            2022-05-12  315  	else
b2c9e2fbba3253 Zi Yan            2022-05-12  316  		isolate_pageblock = boundary_pfn;
b2c9e2fbba3253 Zi Yan            2022-05-12  317  
b2c9e2fbba3253 Zi Yan            2022-05-12  318  	/*
b2c9e2fbba3253 Zi Yan            2022-05-12  319  	 * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid
b2c9e2fbba3253 Zi Yan            2022-05-12  320  	 * only isolating a subset of pageblocks from a bigger than pageblock
b2c9e2fbba3253 Zi Yan            2022-05-12  321  	 * free or in-use page. Also make sure all to-be-isolated pageblocks
b2c9e2fbba3253 Zi Yan            2022-05-12  322  	 * are within the same zone.
b2c9e2fbba3253 Zi Yan            2022-05-12  323  	 */
b2c9e2fbba3253 Zi Yan            2022-05-12  324  	zone  = page_zone(pfn_to_page(isolate_pageblock));
b2c9e2fbba3253 Zi Yan            2022-05-12  325  	start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
b2c9e2fbba3253 Zi Yan            2022-05-12  326  				      zone->zone_start_pfn);
b2c9e2fbba3253 Zi Yan            2022-05-12  327  
b2c9e2fbba3253 Zi Yan            2022-05-12  328  	/*
b2c9e2fbba3253 Zi Yan            2022-05-12  329  	 * Bail out early when the to-be-isolated pageblock does not form
b2c9e2fbba3253 Zi Yan            2022-05-12  330  	 * a free or in-use page across boundary_pfn:
b2c9e2fbba3253 Zi Yan            2022-05-12  331  	 *
b2c9e2fbba3253 Zi Yan            2022-05-12  332  	 * 1. isolate before boundary_pfn: the page after is not online
b2c9e2fbba3253 Zi Yan            2022-05-12  333  	 * 2. isolate after boundary_pfn: the page before is not online
b2c9e2fbba3253 Zi Yan            2022-05-12  334  	 *
b2c9e2fbba3253 Zi Yan            2022-05-12  335  	 * This also ensures correctness. Without it, when isolate after
b2c9e2fbba3253 Zi Yan            2022-05-12  336  	 * boundary_pfn and [start_pfn, boundary_pfn) are not online,
b2c9e2fbba3253 Zi Yan            2022-05-12  337  	 * __first_valid_page() will return unexpected NULL in the for loop
b2c9e2fbba3253 Zi Yan            2022-05-12  338  	 * below.
b2c9e2fbba3253 Zi Yan            2022-05-12  339  	 */
b2c9e2fbba3253 Zi Yan            2022-05-12  340  	if (isolate_before) {
b2c9e2fbba3253 Zi Yan            2022-05-12  341  		if (!pfn_to_online_page(boundary_pfn))
b2c9e2fbba3253 Zi Yan            2022-05-12  342  			return 0;
b2c9e2fbba3253 Zi Yan            2022-05-12  343  	} else {
b2c9e2fbba3253 Zi Yan            2022-05-12  344  		if (!pfn_to_online_page(boundary_pfn - 1))
b2c9e2fbba3253 Zi Yan            2022-05-12  345  			return 0;
b2c9e2fbba3253 Zi Yan            2022-05-12  346  	}
b2c9e2fbba3253 Zi Yan            2022-05-12  347  
b2c9e2fbba3253 Zi Yan            2022-05-12  348  	for (pfn = start_pfn; pfn < boundary_pfn;) {
b2c9e2fbba3253 Zi Yan            2022-05-12  349  		struct page *page = __first_valid_page(pfn, boundary_pfn - pfn);
b2c9e2fbba3253 Zi Yan            2022-05-12  350  
b2c9e2fbba3253 Zi Yan            2022-05-12  351  		VM_BUG_ON(!page);
b2c9e2fbba3253 Zi Yan            2022-05-12  352  		pfn = page_to_pfn(page);
b2c9e2fbba3253 Zi Yan            2022-05-12  353  		/*
b2c9e2fbba3253 Zi Yan            2022-05-12  354  		 * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
b2c9e2fbba3253 Zi Yan            2022-05-12  355  		 * free pages in [start_pfn, boundary_pfn), its head page will
b2c9e2fbba3253 Zi Yan            2022-05-12  356  		 * always be in the range.
b2c9e2fbba3253 Zi Yan            2022-05-12  357  		 */
b2c9e2fbba3253 Zi Yan            2022-05-12  358  		if (PageBuddy(page)) {
b2c9e2fbba3253 Zi Yan            2022-05-12  359  			int order = buddy_order(page);
b2c9e2fbba3253 Zi Yan            2022-05-12  360  
86d28b0709279c Zi Yan            2022-05-26  361  			if (pfn + (1UL << order) > boundary_pfn) {
86d28b0709279c Zi Yan            2022-05-26  362  				/* free page changed before split, check it again */
86d28b0709279c Zi Yan            2022-05-26  363  				if (split_free_page(page, order, boundary_pfn - pfn))
86d28b0709279c Zi Yan            2022-05-26  364  					continue;
86d28b0709279c Zi Yan            2022-05-26  365  			}
86d28b0709279c Zi Yan            2022-05-26  366  
86d28b0709279c Zi Yan            2022-05-26  367  			pfn += 1UL << order;
b2c9e2fbba3253 Zi Yan            2022-05-12  368  			continue;
b2c9e2fbba3253 Zi Yan            2022-05-12  369  		}
b2c9e2fbba3253 Zi Yan            2022-05-12  370  		/*
b2c9e2fbba3253 Zi Yan            2022-05-12  371  		 * migrate compound pages then let the free page handling code
b2c9e2fbba3253 Zi Yan            2022-05-12  372  		 * above do the rest. If migration is not possible, just fail.
b2c9e2fbba3253 Zi Yan            2022-05-12  373  		 */
b2c9e2fbba3253 Zi Yan            2022-05-12  374  		if (PageCompound(page)) {
b2c9e2fbba3253 Zi Yan            2022-05-12  375  			struct page *head = compound_head(page);
b2c9e2fbba3253 Zi Yan            2022-05-12  376  			unsigned long head_pfn = page_to_pfn(head);
547be963c99f1e Zi Yan            2022-05-30  377  			unsigned long nr_pages = compound_nr(head);
b2c9e2fbba3253 Zi Yan            2022-05-12  378  
88ee134320b831 Zi Yan            2022-05-24  379  			if (head_pfn + nr_pages <= boundary_pfn) {
b2c9e2fbba3253 Zi Yan            2022-05-12  380  				pfn = head_pfn + nr_pages;
b2c9e2fbba3253 Zi Yan            2022-05-12  381  				continue;
b2c9e2fbba3253 Zi Yan            2022-05-12  382  			}
b2c9e2fbba3253 Zi Yan            2022-05-12  383  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b2c9e2fbba3253 Zi Yan            2022-05-12  384  			/*
b2c9e2fbba3253 Zi Yan            2022-05-12  385  			 * hugetlb, lru compound (THP), and movable compound pages
b2c9e2fbba3253 Zi Yan            2022-05-12  386  			 * can be migrated. Otherwise, fail the isolation.
b2c9e2fbba3253 Zi Yan            2022-05-12  387  			 */
b2c9e2fbba3253 Zi Yan            2022-05-12  388  			if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
b2c9e2fbba3253 Zi Yan            2022-05-12  389  				int order;
b2c9e2fbba3253 Zi Yan            2022-05-12  390  				unsigned long outer_pfn;
88ee134320b831 Zi Yan            2022-05-24  391  				int page_mt = get_pageblock_migratetype(page);
88ee134320b831 Zi Yan            2022-05-24  392  				bool isolate_page = !is_migrate_isolate_page(page);
b2c9e2fbba3253 Zi Yan            2022-05-12  393  				struct compact_control cc = {
b2c9e2fbba3253 Zi Yan            2022-05-12  394  					.nr_migratepages = 0,
b2c9e2fbba3253 Zi Yan            2022-05-12  395  					.order = -1,
b2c9e2fbba3253 Zi Yan            2022-05-12  396  					.zone = page_zone(pfn_to_page(head_pfn)),
b2c9e2fbba3253 Zi Yan            2022-05-12  397  					.mode = MIGRATE_SYNC,
b2c9e2fbba3253 Zi Yan            2022-05-12  398  					.ignore_skip_hint = true,
b2c9e2fbba3253 Zi Yan            2022-05-12  399  					.no_set_skip_hint = true,
b2c9e2fbba3253 Zi Yan            2022-05-12  400  					.gfp_mask = gfp_flags,
b2c9e2fbba3253 Zi Yan            2022-05-12  401  					.alloc_contig = true,
b2c9e2fbba3253 Zi Yan            2022-05-12  402  				};
b2c9e2fbba3253 Zi Yan            2022-05-12  403  				INIT_LIST_HEAD(&cc.migratepages);
b2c9e2fbba3253 Zi Yan            2022-05-12  404  
88ee134320b831 Zi Yan            2022-05-24  405  				/*
88ee134320b831 Zi Yan            2022-05-24  406  				 * XXX: mark the page as MIGRATE_ISOLATE so that
88ee134320b831 Zi Yan            2022-05-24  407  				 * no one else can grab the freed page after migration.
88ee134320b831 Zi Yan            2022-05-24  408  				 * Ideally, the page should be freed as two separate
88ee134320b831 Zi Yan            2022-05-24  409  				 * pages to be added into separate migratetype free
88ee134320b831 Zi Yan            2022-05-24  410  				 * lists.
88ee134320b831 Zi Yan            2022-05-24  411  				 */
88ee134320b831 Zi Yan            2022-05-24  412  				if (isolate_page) {
88ee134320b831 Zi Yan            2022-05-24  413  					ret = set_migratetype_isolate(page, page_mt,
88ee134320b831 Zi Yan            2022-05-24  414  						flags, head_pfn, head_pfn + nr_pages);
88ee134320b831 Zi Yan            2022-05-24  415  					if (ret)
10d000298e8a6b Doug Berger       2022-09-13  416  						return ret;
88ee134320b831 Zi Yan            2022-05-24  417  				}
88ee134320b831 Zi Yan            2022-05-24  418  
b2c9e2fbba3253 Zi Yan            2022-05-12  419  				ret = __alloc_contig_migrate_range(&cc, head_pfn,
b2c9e2fbba3253 Zi Yan            2022-05-12  420  							head_pfn + nr_pages);
b2c9e2fbba3253 Zi Yan            2022-05-12  421  
88ee134320b831 Zi Yan            2022-05-24  422  				/*
88ee134320b831 Zi Yan            2022-05-24  423  				 * restore the page's migratetype so that it can
88ee134320b831 Zi Yan            2022-05-24  424  				 * be split into separate migratetype free lists
88ee134320b831 Zi Yan            2022-05-24  425  				 * later.
88ee134320b831 Zi Yan            2022-05-24  426  				 */
88ee134320b831 Zi Yan            2022-05-24  427  				if (isolate_page)
88ee134320b831 Zi Yan            2022-05-24  428  					unset_migratetype_isolate(page, page_mt);
88ee134320b831 Zi Yan            2022-05-24  429  
b2c9e2fbba3253 Zi Yan            2022-05-12  430  				if (ret)
10d000298e8a6b Doug Berger       2022-09-13  431  					return -EBUSY;
b2c9e2fbba3253 Zi Yan            2022-05-12  432  				/*
b2c9e2fbba3253 Zi Yan            2022-05-12  433  				 * reset pfn to the head of the free page, so
b2c9e2fbba3253 Zi Yan            2022-05-12  434  				 * that the free page handling code above can split
b2c9e2fbba3253 Zi Yan            2022-05-12  435  				 * the free page to the right migratetype list.
b2c9e2fbba3253 Zi Yan            2022-05-12  436  				 *
b2c9e2fbba3253 Zi Yan            2022-05-12  437  				 * head_pfn is not used here as a hugetlb page order
b2c9e2fbba3253 Zi Yan            2022-05-12  438  				 * can be bigger than MAX_ORDER-1, but after it is
b2c9e2fbba3253 Zi Yan            2022-05-12  439  				 * freed, the free page order is not. Use pfn within
b2c9e2fbba3253 Zi Yan            2022-05-12  440  				 * the range to find the head of the free page.
b2c9e2fbba3253 Zi Yan            2022-05-12  441  				 */
b2c9e2fbba3253 Zi Yan            2022-05-12  442  				order = 0;
b2c9e2fbba3253 Zi Yan            2022-05-12  443  				outer_pfn = pfn;
b2c9e2fbba3253 Zi Yan            2022-05-12  444  				while (!PageBuddy(pfn_to_page(outer_pfn))) {
88ee134320b831 Zi Yan            2022-05-24  445  					/* stop if we cannot find the free page */
88ee134320b831 Zi Yan            2022-05-24  446  					if (++order >= MAX_ORDER)
10d000298e8a6b Doug Berger       2022-09-13  447  						return -EBUSY;
b2c9e2fbba3253 Zi Yan            2022-05-12  448  					outer_pfn &= ~0UL << order;
b2c9e2fbba3253 Zi Yan            2022-05-12  449  				}
b2c9e2fbba3253 Zi Yan            2022-05-12  450  				pfn = outer_pfn;
b2c9e2fbba3253 Zi Yan            2022-05-12  451  				continue;
b2c9e2fbba3253 Zi Yan            2022-05-12  452  			} else
b2c9e2fbba3253 Zi Yan            2022-05-12  453  #endif
10d000298e8a6b Doug Berger       2022-09-13  454  				return -EBUSY;
b2c9e2fbba3253 Zi Yan            2022-05-12  455  		}
b2c9e2fbba3253 Zi Yan            2022-05-12  456  
b2c9e2fbba3253 Zi Yan            2022-05-12  457  		pfn++;
b2c9e2fbba3253 Zi Yan            2022-05-12  458  	}
b2c9e2fbba3253 Zi Yan            2022-05-12  459  	return 0;
b2c9e2fbba3253 Zi Yan            2022-05-12  460  }
b2c9e2fbba3253 Zi Yan            2022-05-12  461
diff mbox series

Patch

diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 9d73dc38e3d7..8e16aa22cb61 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -286,8 +286,6 @@  __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * @flags:			isolation flags
  * @gfp_flags:			GFP flags used for migrating pages
  * @isolate_before:	isolate the pageblock before the boundary_pfn
- * @skip_isolation:	the flag to skip the pageblock isolation in second
- *			isolate_single_pageblock()
  *
  * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
  * pageblock. When not all pageblocks within a page are isolated at the same
@@ -302,9 +300,8 @@  __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * the in-use page then splitting the free page.
  */
 static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
-			gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
+			gfp_t gfp_flags, bool isolate_before)
 {
-	unsigned char saved_mt;
 	unsigned long start_pfn;
 	unsigned long isolate_pageblock;
 	unsigned long pfn;
@@ -328,18 +325,6 @@  static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 	start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
 				      zone->zone_start_pfn);
 
-	saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
-
-	if (skip_isolation)
-		VM_BUG_ON(!is_migrate_isolate(saved_mt));
-	else {
-		ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
-				isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
-
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * Bail out early when the to-be-isolated pageblock does not form
 	 * a free or in-use page across boundary_pfn:
@@ -428,7 +413,7 @@  static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 					ret = set_migratetype_isolate(page, page_mt,
 						flags, head_pfn, head_pfn + nr_pages);
 					if (ret)
-						goto failed;
+						return ret;
 				}
 
 				ret = __alloc_contig_migrate_range(&cc, head_pfn,
@@ -443,7 +428,7 @@  static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 					unset_migratetype_isolate(page, page_mt);
 
 				if (ret)
-					goto failed;
+					return -EBUSY;
 				/*
 				 * reset pfn to the head of the free page, so
 				 * that the free page handling code above can split
@@ -459,24 +444,19 @@  static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 				while (!PageBuddy(pfn_to_page(outer_pfn))) {
 					/* stop if we cannot find the free page */
 					if (++order >= MAX_ORDER)
-						goto failed;
+						return -EBUSY;
 					outer_pfn &= ~0UL << order;
 				}
 				pfn = outer_pfn;
 				continue;
 			} else
 #endif
-				goto failed;
+				return -EBUSY;
 		}
 
 		pfn++;
 	}
 	return 0;
-failed:
-	/* restore the original migratetype */
-	if (!skip_isolation)
-		unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
-	return -EBUSY;
 }
 
 /**
@@ -534,21 +514,30 @@  int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages);
 	unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages);
 	int ret;
-	bool skip_isolation = false;
 
 	/* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
-	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
+	ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype,
+			flags, isolate_start, isolate_start + pageblock_nr_pages);
 	if (ret)
 		return ret;
-
-	if (isolate_start == isolate_end - pageblock_nr_pages)
-		skip_isolation = true;
+	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false);
+	if (ret)
+		goto unset_start_block;
 
 	/* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
-	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
+	pfn = isolate_end - pageblock_nr_pages;
+	if (isolate_start != pfn) {
+		ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype,
+				flags, pfn, pfn + pageblock_nr_pages);
+		if (ret)
+			goto unset_start_block;
+	}
+	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true);
 	if (ret) {
-		unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
-		return ret;
+		if (isolate_start != pfn)
+			goto unset_end_block;
+		else
+			goto unset_start_block;
 	}
 
 	/* skip isolated pageblocks at the beginning and end */
@@ -557,15 +546,21 @@  int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	     pfn += pageblock_nr_pages) {
 		page = __first_valid_page(pfn, pageblock_nr_pages);
 		if (page && set_migratetype_isolate(page, migratetype, flags,
-					start_pfn, end_pfn)) {
-			undo_isolate_page_range(isolate_start, pfn, migratetype);
-			unset_migratetype_isolate(
-				pfn_to_page(isolate_end - pageblock_nr_pages),
-				migratetype);
-			return -EBUSY;
-		}
+					start_pfn, end_pfn))
+			goto unset_isolated_blocks;
 	}
 	return 0;
+
+unset_isolated_blocks:
+	ret = -EBUSY;
+	undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn,
+				migratetype);
+unset_end_block:
+	unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages),
+				  migratetype);
+unset_start_block:
+	unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
+	return ret;
 }
 
 /*