diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ac7eb9cbf24a67ac02925ec8a659219db1b97cf6..90aae84c850aa257bb940e72528e712ab396c90f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4669,6 +4669,14 @@
 	pm_debug_messages	[SUSPEND,KNL]
 			Enable suspend/resume debug messages during boot up.
 
+	pmd_mapping=	[ARM64,KNL]
+			Format: nn%
+			Allows to allocate contiguous memory from special pfn
+			range, the linear mapping granule of this range is never
+			larger than PMD. pmd_mapping specifies the percent of
+			memory of each node. pmd_mapping=100% is used for hugetlb
+			scenarios, the whole linear mapping isn't large than PMD.
+
 	pnp.debug=1	[PNP]
 			Enable PNP debug messages (depends on the
 			CONFIG_PNP_DEBUG_MESSAGES option).  Change at run-time
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dec41d9fa0548721ae915729a929018f38008adc..95974b69e20207840b0f4dad0ec927301b512330 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2753,3 +2753,4 @@ source "drivers/acpi/Kconfig"
 
 source "arch/arm64/kvm/Kconfig"
 
+source "arch/arm64/mm/Kconfig"
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 63625b54e541755a849a552fb6cb7473ce94574e..51549b6fc1a39ee7280361838fc6eaeebbce1433 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -796,6 +796,7 @@ CONFIG_KVM=y
 CONFIG_KVM_ARM_MULTI_LPI_TRANSLATE_CACHE=y
 CONFIG_ARCH_VCPU_STAT=y
 CONFIG_VIRT_VTIMER_IRQ_BYPASS=y
+CONFIG_PFN_RANGE_ALLOC=y
 CONFIG_CPU_MITIGATIONS=y
 
 #
diff --git a/arch/arm64/mm/Kconfig b/arch/arm64/mm/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..e7880c691822bbc4eca09fb493de550787ae6cb9
--- /dev/null
+++ b/arch/arm64/mm/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# MEMORY configuration for arm64
+#
+
+config PFN_RANGE_ALLOC
+	bool "Enable contiguous pfn range allocator"
+	default n
+	select CONTIG_ALLOC
+	depends on MEMORY_HOTPLUG
+	help
+		It allows to allocate contiguous memory from special pfn range,
+		the linear mapping granule of this range is never larger than PMD.
+
+		If unsure, say N.
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 2fc8c6dd04070b61078e2e8627fd3acc606adc7e..c02aeb729717d8888d6aad41e52ebb200fc18134 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -15,3 +15,4 @@ KASAN_SANITIZE_physaddr.o	+= n
 
 obj-$(CONFIG_KASAN)		+= kasan_init.o
 KASAN_SANITIZE_kasan_init.o	:= n
+obj-$(CONFIG_PFN_RANGE_ALLOC)	+= pfn_range_alloc.o
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 367f761de1ab536e6557cff1fd57765daa19ce14..da75dd9d964b9965a1fdff7c21e50b10e8d9fb38 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -632,6 +632,11 @@ void __init mem_init(void)
 		pswiotlb_init(1, PSWIOTLB_VERBOSE);
 #endif
 
+	/* Must be placed before buddy is initialized, to avoid reserved
+	 * memory is reserved by memblock.
+	 */
+	pmd_mapping_reserve_and_remap();
+
 	/* this will put all unused low memory onto the freelists */
 	memblock_free_all();
 
diff --git a/arch/arm64/mm/internal.h b/arch/arm64/mm/internal.h
index e75ad9dd656d5bfd6f8b80251f5d13a2a8f88ee4..6be8688aad4ecc7cc0a98844fbb4c8427bcd517b 100644
--- a/arch/arm64/mm/internal.h
+++ b/arch/arm64/mm/internal.h
@@ -9,4 +9,27 @@
 extern struct memblock_region mbk_memmap_regions[MAX_RES_REGIONS];
 extern int mbk_memmap_cnt;
 
+#ifdef CONFIG_PFN_RANGE_ALLOC
+#define PFN_RANGE_ALLOC_SIZE PMD_SIZE
+#define PFN_RANGE_ALLOC_ORDER PMD_ORDER
+
+static inline bool should_pmd_linear_mapping(void)
+{
+	return contig_mem_pool_percent == 100;
+}
+
+void __init pmd_mapping_reserve_and_remap(void);
+void __init pmd_mapping_reserved_remap(phys_addr_t start, phys_addr_t end);
+#else
+static inline void pmd_mapping_reserve_and_remap(void)
+{
+}
+static inline void pmd_mapping_reserved_remap(phys_addr_t start, phys_addr_t end)
+{
+}
+static inline bool should_pmd_linear_mapping(void)
+{
+	return false;
+}
+#endif
 #endif /* ifndef _ARM64_MM_INTERNAL_H */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 48e42e4383d3c3fd9008edd0e7e66046a2e12f16..181b5ceb628c07f122d9215135e0f8bfb335ae85 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -25,6 +25,7 @@
 #include <linux/vmalloc.h>
 #include <linux/set_memory.h>
 #include <linux/kfence.h>
+#include "internal.h"
 
 #include <asm/barrier.h>
 #include <asm/cputype.h>
@@ -45,6 +46,11 @@
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
 #define NO_EXEC_MAPPINGS	BIT(2)	/* assumes FEAT_HPDS is not used */
+#ifdef CONFIG_PFN_RANGE_ALLOC
+#define NO_PUD_BLOCK_MAPPINGS BIT(3)
+#else
+#define NO_PUD_BLOCK_MAPPINGS 0
+#endif
 
 int idmap_t0sz __ro_after_init;
 
@@ -347,7 +353,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 		 */
 		if (pud_sect_supported() &&
 		   ((addr | next | phys) & ~PUD_MASK) == 0 &&
-		    (flags & NO_BLOCK_MAPPINGS) == 0) {
+		    (flags & (NO_BLOCK_MAPPINGS | NO_PUD_BLOCK_MAPPINGS)) == 0) {
 			pud_set_huge(pudp, phys, prot);
 
 			/*
@@ -594,6 +600,8 @@ static void __init map_mem(pgd_t *pgdp)
 
 	if (can_set_direct_map() || is_virtcca_cvm_world())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+	else if (should_pmd_linear_mapping())
+		flags |= NO_PUD_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
 	 * Take care not to create a writable alias for the
@@ -1141,6 +1149,20 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
 }
 #endif
 
+#ifdef CONFIG_PFN_RANGE_ALLOC
+void __init pmd_mapping_reserved_remap(phys_addr_t start, phys_addr_t end)
+{
+	unsigned long vstart, vend;
+
+	vstart = __phys_to_virt(start);
+	vend = __phys_to_virt(end);
+	unmap_hotplug_range(vstart, vend, false, NULL);
+	__create_pgd_mapping(swapper_pg_dir, start, vstart, end - start,
+			pgprot_tagged(PAGE_KERNEL), early_pgtable_alloc,
+			NO_PUD_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+}
+#endif
+
 void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
 			       unsigned long addr, unsigned long next)
 {
@@ -1385,6 +1407,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
 
 	if (can_set_direct_map() || is_virtcca_cvm_world())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+	else if (should_pmd_linear_mapping())
+		flags |= NO_PUD_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
 			     size, params->pgprot, __pgd_pgtable_alloc,
diff --git a/arch/arm64/mm/pfn_range_alloc.c b/arch/arm64/mm/pfn_range_alloc.c
new file mode 100644
index 0000000000000000000000000000000000000000..011b65bcbbaa8a93e05e3b6f931a568b33cdca32
--- /dev/null
+++ b/arch/arm64/mm/pfn_range_alloc.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Contiguous pfn range allocator
+ *
+ * Copyright (C) 2025 Huawei Limited.
+ */
+
+#define pr_fmt(fmt) "pfn_range_alloc: " fmt
+
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/page-isolation.h>
+#include <linux/set_memory.h>
+#include <trace/events/kmem.h>
+#include <linux/pagewalk.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/numa_remote.h>
+#include "internal.h"
+#include "../../../mm/internal.h"
+
+struct pmd_lm_range {
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+	spinlock_t	lock;
+	unsigned long *bitmap;
+	unsigned long bitmap_maxno;
+};
+
+unsigned long contig_mem_pool_percent __ro_after_init;
+EXPORT_SYMBOL_GPL(contig_mem_pool_percent);
+static unsigned long nr_reserved_pages[MAX_NUMNODES] __initdata;
+static struct pmd_lm_range reserved_range[MAX_NUMNODES];
+DEFINE_STATIC_KEY_FALSE(pmd_mapping_initialized);
+static atomic_long_t num_poisoned_pfn __read_mostly = ATOMIC_LONG_INIT(0);
+
+static inline bool pmd_linear_mapping_enabled(void)
+{
+	return static_branch_unlikely(&pmd_mapping_initialized);
+}
+
+static __init int cmdline_parse_pmd_mapping(char *p)
+{
+	unsigned long percent;
+	char *endptr;
+
+	if (!p)
+		return -EINVAL;
+
+	percent = simple_strtoul(p, &endptr, 0);
+	if (*endptr != '%' || *(endptr + 1) != '\0')
+		return -EINVAL;
+
+	if (percent > 100)
+		return -EINVAL;
+
+	contig_mem_pool_percent = percent;
+
+	return 0;
+}
+early_param("pmd_mapping", cmdline_parse_pmd_mapping);
+
+static __init void calculate_node_nr_reserved_pages(void)
+{
+	unsigned long start_pfn, end_pfn;
+	int i, nid;
+
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
+		nr_reserved_pages[nid] += end_pfn - start_pfn;
+
+	for_each_online_node(nid) {
+		nr_reserved_pages[nid] = nr_reserved_pages[nid] * contig_mem_pool_percent / 100;
+		nr_reserved_pages[nid] = ALIGN_DOWN(nr_reserved_pages[nid],
+							PUD_SIZE / PAGE_SIZE);
+	}
+}
+
+static __init unsigned long calculate_reserve_base(int nid)
+{
+	pg_data_t *pgdat = NODE_DATA(nid);
+	struct zone *zone;
+	unsigned long base = 0;
+
+#ifdef CONFIG_ZONE_DMA
+	zone = &pgdat->node_zones[ZONE_DMA];
+	if (managed_zone(zone))
+		base = max(base, PFN_PHYS(zone_end_pfn(zone)));
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+	zone = &pgdat->node_zones[ZONE_DMA32];
+	if (managed_zone(zone))
+		base = max(base, PFN_PHYS(zone_end_pfn(zone)));
+#endif
+
+	return base;
+}
+
+static __init int __get_suitable_reserved_range(int nid)
+{
+	unsigned long base, size, start;
+
+	base = calculate_reserve_base(nid);
+retry:
+	size = nr_reserved_pages[nid] * PAGE_SIZE;
+	start = memblock_alloc_range_nid(size, PUD_SIZE, base, 0, nid, true);
+	/*
+	 * If reservation fails, try to fallback to reserve
+	 * smaller size. Fallback is at PUD_SIZE granularity.
+	 */
+	if (!start) {
+		nr_reserved_pages[nid] -= PUD_SIZE / PAGE_SIZE;
+		if (!nr_reserved_pages[nid])
+			return -ENOMEM;
+		goto retry;
+	}
+
+	reserved_range[nid].start_pfn = PHYS_PFN(start);
+	reserved_range[nid].end_pfn = PHYS_PFN(start) + nr_reserved_pages[nid];
+
+	return 0;
+}
+
+static __init int get_suitable_reserved_range(void)
+{
+	bool restore_bottom_up = false;
+	unsigned long start, end;
+	bool resved = false;
+	int nid, ret;
+
+	if (memblock_bottom_up()) {
+		memblock_set_bottom_up(false);
+		restore_bottom_up = true;
+	}
+
+	calculate_node_nr_reserved_pages();
+	for_each_online_node(nid) {
+		if (!nr_reserved_pages[nid])
+			continue;
+
+		ret = __get_suitable_reserved_range(nid);
+		if (ret) {
+			pr_warn("reservation failed for node %d\n", nid);
+			continue;
+		}
+
+		start = PFN_PHYS(reserved_range[nid].start_pfn);
+		end = PFN_PHYS(reserved_range[nid].end_pfn);
+		pmd_mapping_reserved_remap(start, end);
+		resved = true;
+		pr_info("reserved %lu MiB on node %d\n", (end - start) / SZ_1M, nid);
+	}
+
+	if (restore_bottom_up)
+		memblock_set_bottom_up(true);
+
+	return resved;
+}
+
+static __init void put_suitable_reserved_range(void)
+{
+	unsigned long start, end;
+	int ret, nid;
+
+	for_each_online_node(nid) {
+		start = PFN_PHYS(reserved_range[nid].start_pfn);
+		end = PFN_PHYS(reserved_range[nid].end_pfn);
+
+		if (start == end)
+			continue;
+
+		ret = memblock_phys_free(start, end - start);
+		if (ret)
+			pr_warn("put reserved memory [%lx, %lx) failed(%d) for node %d\n",
+					start, end, ret, nid);
+	}
+}
+
+void __init pmd_mapping_reserve_and_remap(void)
+{
+	bool resved;
+
+	if (!contig_mem_pool_percent)
+		return;
+
+	if (should_pmd_linear_mapping())
+		goto out;
+
+	if (can_set_direct_map()) {
+		pr_info("linear mapping is mapped at PTE level, all memory can be borrowed\n");
+		goto out;
+	}
+
+	resved = get_suitable_reserved_range();
+	if (!resved)
+		return;
+
+	put_suitable_reserved_range();
+out:
+	static_branch_enable(&pmd_mapping_initialized);
+}
+
+static int __init activate_reserved_range(void)
+{
+	int nid;
+	unsigned long pfn, end_pfn;
+	unsigned long bitmap_maxno;
+
+	if (!pmd_linear_mapping_enabled())
+		return 0;
+
+	for_each_online_node(nid) {
+		pfn = reserved_range[nid].start_pfn;
+		end_pfn = reserved_range[nid].end_pfn;
+
+		if (pfn == end_pfn)
+			continue;
+
+		bitmap_maxno = (end_pfn - pfn) / (PFN_RANGE_ALLOC_SIZE / PAGE_SIZE);
+		reserved_range[nid].bitmap_maxno = bitmap_maxno;
+		reserved_range[nid].bitmap = bitmap_zalloc(bitmap_maxno, GFP_KERNEL);
+		if (!reserved_range[nid].bitmap) {
+			reserved_range[nid].start_pfn = 0;
+			reserved_range[nid].end_pfn = 0;
+			pr_warn("reserved_range %d fails to be initialized\n", nid);
+			continue;
+		}
+		spin_lock_init(&reserved_range[nid].lock);
+	}
+
+	return 0;
+}
+core_initcall(activate_reserved_range);
+
+struct folio *pfn_range_alloc(unsigned int nr_pages, int nid)
+{
+	unsigned long min_align = PFN_RANGE_ALLOC_NR_PAGES;
+	gfp_t gfp_mask = (GFP_KERNEL | __GFP_COMP) & ~__GFP_RECLAIM;
+	unsigned long start, bitmap_no, bitmap_count, mask, offset;
+	struct pmd_lm_range *mem_range;
+	struct folio *folio = ERR_PTR(-EINVAL);
+	unsigned long pfn;
+	int ret;
+
+	if (in_interrupt())
+		goto out;
+
+	if (nid < 0 || nid >= MAX_NUMNODES)
+		goto out;
+
+	if (!IS_ALIGNED(nr_pages, min_align))
+		goto out;
+
+	if (can_set_direct_map() || should_pmd_linear_mapping()) {
+		int order = ilog2(nr_pages);
+
+		folio = NULL;
+		gfp_mask |= __GFP_THISNODE;
+		if (nr_pages <= MAX_ORDER_NR_PAGES)
+			folio = __folio_alloc_node(gfp_mask | __GFP_NOWARN, order, nid);
+		if (!folio)
+			folio = folio_alloc_gigantic(order, gfp_mask, nid, NULL);
+		if (!folio)
+			folio = ERR_PTR(-ENOMEM);
+
+		goto out;
+	}
+
+	mem_range = &reserved_range[nid];
+	if (!mem_range->bitmap) {
+		folio = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	start = 0;
+	bitmap_count = nr_pages / min_align;
+	mask = bitmap_count - 1;
+	offset = (mem_range->start_pfn & (nr_pages - 1)) / min_align;
+	for (;;) {
+		spin_lock(&mem_range->lock);
+		bitmap_no = bitmap_find_next_zero_area_off(mem_range->bitmap,
+				mem_range->bitmap_maxno, start, bitmap_count, mask, offset);
+		if (bitmap_no >= mem_range->bitmap_maxno) {
+			spin_unlock(&mem_range->lock);
+			break;
+		}
+		bitmap_set(mem_range->bitmap, bitmap_no, bitmap_count);
+		spin_unlock(&mem_range->lock);
+		pfn = mem_range->start_pfn + bitmap_no * min_align;
+		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, gfp_mask);
+		if (!ret) {
+			folio = pfn_folio(pfn);
+			goto out;
+		}
+
+		spin_lock(&mem_range->lock);
+		bitmap_clear(mem_range->bitmap, bitmap_no, bitmap_count);
+		spin_unlock(&mem_range->lock);
+		start = bitmap_no + bitmap_count;
+	}
+
+	folio = ERR_PTR(-ENOMEM);
+out:
+	trace_pfn_range_alloc(folio, nr_pages, nid);
+	return folio;
+}
+EXPORT_SYMBOL_GPL(pfn_range_alloc);
+
+static void pfn_range_folio_dissolve(struct folio *folio)
+{
+	int nr_pages = folio_nr_pages(folio);
+	struct page *page;
+	int i;
+
+	VM_WARN_ON_FOLIO(folio_ref_count(folio) != 1, folio);
+
+	for (i = 1; i < nr_pages; i++) {
+		page = folio_page(folio, i);
+		page->flags &= ~PAGE_FLAGS_CHECK_AT_FREE;
+		page->mapping = NULL;
+		clear_compound_head(page);
+		set_page_refcounted(page);
+	}
+
+	__folio_clear_head(folio);
+
+	page = &folio->page;
+	for (i = 0; i < nr_pages; i++, page++) {
+		if (PageHWPoison(page)) {
+			atomic_long_inc(&num_poisoned_pfn);
+			continue;
+		}
+
+		__free_page(page);
+	}
+}
+
+static inline bool pfn_range_free_prepare(struct folio *folio)
+{
+	int nr_pages = folio_nr_pages(folio);
+	struct page *page = &folio->page;
+	int i;
+
+	for (i = 0; i < nr_pages; i++, page++)
+		if (PageHWPoison(page)) {
+			pfn_range_folio_dissolve(folio);
+			return false;
+		}
+
+	return true;
+}
+
+int pfn_range_free(struct folio *folio)
+{
+	struct pmd_lm_range *mem_range;
+	unsigned long start_pfn, end_pfn;
+	unsigned long bitmap_no, bitmap_count;
+	unsigned long nr_pages = folio_nr_pages(folio);
+	unsigned long min_align = PFN_RANGE_ALLOC_NR_PAGES;
+	int ret = 0;
+
+	if (in_interrupt()) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!pfn_range_free_prepare(folio))
+		goto out;
+
+	if (can_set_direct_map() || should_pmd_linear_mapping()) {
+		folio_put(folio);
+		goto out;
+	}
+
+	mem_range = &reserved_range[folio_nid(folio)];
+	start_pfn = folio_pfn(folio);
+	end_pfn = start_pfn + nr_pages;
+
+	if (start_pfn < mem_range->start_pfn || end_pfn > mem_range->end_pfn) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	free_contig_range(start_pfn, nr_pages);
+	bitmap_no = (start_pfn - mem_range->start_pfn) / min_align;
+	bitmap_count = nr_pages / min_align;
+	spin_lock(&mem_range->lock);
+	bitmap_clear(mem_range->bitmap, bitmap_no, bitmap_count);
+	spin_unlock(&mem_range->lock);
+
+out:
+	trace_pfn_range_free(folio, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pfn_range_free);
+
+static inline int check_update_lm_arg(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long start, end;
+	struct page *start_page;
+	int nid;
+
+	start_page = pfn_to_page(start_pfn);
+	nid = page_to_nid(start_page);
+	start = (unsigned long)page_to_virt(start_page);
+	end = start + (end_pfn - start_pfn) * PAGE_SIZE;
+	if ((start_pfn >= reserved_range[nid].start_pfn &&
+	    end_pfn <= reserved_range[nid].end_pfn)
+	    || should_pmd_linear_mapping()
+	    || can_set_direct_map()) {
+		if (!IS_ALIGNED(start, PFN_RANGE_ALLOC_SIZE) ||
+			!IS_ALIGNED(end, PFN_RANGE_ALLOC_SIZE)) {
+			return -EINVAL;
+		}
+	} else if (!IS_ALIGNED(start, PUD_SIZE) || !IS_ALIGNED(end, PUD_SIZE)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int invalid_pud_entry(pud_t *pudp, unsigned long addr,
+				unsigned long next, struct mm_walk *walk)
+{
+	bool set_invalid = (bool)walk->private;
+	pud_t pud;
+
+	pud = pudp_get(pudp);
+	if (pud_table(pud))
+		return 0;
+
+	if (set_invalid)
+		pud_val(pud) &= ~PTE_VALID;
+	else
+		pud_val(pud) |= PTE_VALID;
+	set_pud(pudp, pud);
+
+	return 0;
+}
+
+static int invalid_pmd_entry(pmd_t *pmdp, unsigned long addr,
+				unsigned long next, struct mm_walk *walk)
+{
+	bool set_invalid = (bool)walk->private;
+	pmd_t pmd;
+
+	pmd = pmdp_get(pmdp);
+	if (pmd_table(pmd))
+		return 0;
+
+	if (set_invalid)
+		pmd_val(pmd) &= ~PTE_VALID;
+	else
+		pmd_val(pmd) |= PTE_VALID;
+	set_pmd(pmdp, pmd);
+
+	return 0;
+}
+
+static int invalid_pte_entry(pte_t *ptep, unsigned long addr,
+				unsigned long next, struct mm_walk *walk)
+{
+	bool set_invalid = (bool)walk->private;
+	pte_t pte;
+
+	pte = ptep_get(ptep);
+
+	if (set_invalid)
+		pte_val(pte) &= ~PTE_VALID;
+	else
+		pte_val(pte) |= PTE_VALID;
+	set_pte(ptep, pte);
+
+	return 0;
+}
+
+static const struct mm_walk_ops invalid_ops = {
+	.pud_entry = invalid_pud_entry,
+	.pmd_entry = invalid_pmd_entry,
+	.pte_entry = invalid_pte_entry,
+};
+
+int set_linear_mapping_invalid(unsigned long start_pfn, unsigned long end_pfn,
+										bool set_invalid)
+{
+	unsigned long start, end;
+	int ret;
+
+	ret = check_update_lm_arg(start_pfn, end_pfn);
+	if (ret)
+		return ret;
+
+	start = (unsigned long)page_to_virt(pfn_to_page(start_pfn));
+	end = start + (end_pfn - start_pfn) * PAGE_SIZE;
+	mmap_write_lock(&init_mm);
+	walk_page_range_novma(&init_mm, start, end,
+				&invalid_ops, NULL, (void *)set_invalid);
+	mmap_write_unlock(&init_mm);
+	if (set_invalid)
+		flush_tlb_kernel_range(start, end);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(set_linear_mapping_invalid);
+
+static inline void update_entry_nc(unsigned long long *val, bool set_nc)
+{
+	*val &= ~PTE_ATTRINDX_MASK;
+	if (set_nc)
+		*val |= PTE_ATTRINDX(MT_NORMAL_NC);
+	else
+		*val |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
+	*val |= PTE_VALID;
+}
+
+static int nc_pud_entry(pud_t *pudp, unsigned long addr,
+				unsigned long next, struct mm_walk *walk)
+{
+	bool set_nc = (bool)walk->private;
+	pud_t pud;
+
+	if (pud_table(*pudp))
+		return 0;
+
+	pud = pudp_huge_get_and_clear(walk->mm, addr, pudp);
+	update_entry_nc(&pud_val(pud), set_nc);
+	set_pud(pudp, pud);
+
+	return 0;
+}
+
+static int nc_pmd_entry(pmd_t *pmdp, unsigned long addr,
+				unsigned long next, struct mm_walk *walk)
+{
+	bool set_nc = (bool)walk->private;
+	pmd_t pmd;
+
+	if (pmd_table(*pmdp))
+		return 0;
+
+	pmd = pmdp_huge_get_and_clear(walk->mm, addr, pmdp);
+	update_entry_nc(&pmd_val(pmd), set_nc);
+	set_pmd(pmdp, pmd);
+
+	return 0;
+}
+
+static int nc_pte_entry(pte_t *ptep, unsigned long addr,
+				unsigned long next, struct mm_walk *walk)
+{
+	bool set_nc = (bool)walk->private;
+	pte_t pte;
+
+	pte = ptep_get_and_clear(walk->mm, addr, ptep);
+	update_entry_nc(&pte_val(pte), set_nc);
+	set_pte(ptep, pte);
+
+	return 0;
+}
+
+static const struct mm_walk_ops nc_ops = {
+	.pud_entry = nc_pud_entry,
+	.pmd_entry = nc_pmd_entry,
+	.pte_entry = nc_pte_entry,
+};
+
+int set_linear_mapping_nc(unsigned long start_pfn, unsigned long end_pfn, bool set_nc)
+{
+	unsigned long start, end;
+	int ret;
+
+	start = (unsigned long)page_to_virt(pfn_to_page(start_pfn));
+	end = start + (end_pfn - start_pfn) * PAGE_SIZE;
+	ret = check_update_lm_arg(start_pfn, end_pfn);
+	if (ret)
+		return ret;
+
+	mmap_write_lock(&init_mm);
+	walk_page_range_novma(&init_mm, start, end,
+				&invalid_ops, NULL, (void *)true);
+	flush_tlb_kernel_range(start, end);
+	walk_page_range_novma(&init_mm, start, end,
+				&nc_ops, NULL, (void *)set_nc);
+	mmap_write_unlock(&init_mm);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(set_linear_mapping_nc);
+
+#ifdef CONFIG_DEBUG_FS
+static int reserved_range_show(struct seq_file *m, void *v)
+{
+	int nid;
+
+	for_each_online_node(nid) {
+		if (numa_is_remote_node(nid))
+			continue;
+
+		seq_printf(m, "%d, %llx-%llx\n", nid,
+				PFN_PHYS(reserved_range[nid].start_pfn),
+				PFN_PHYS(reserved_range[nid].end_pfn));
+	}
+
+	seq_printf(m, "\nHardwareCorrupted: %lu kB\n",
+		   atomic_long_read(&num_poisoned_pfn) << (PAGE_SHIFT - 10));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(reserved_range);
+
+static int __init reserved_range_debug_init(void)
+{
+	if (!pmd_linear_mapping_enabled())
+		return 0;
+
+	if (can_set_direct_map() || should_pmd_linear_mapping())
+		return 0;
+
+	debugfs_create_file("pmd_mapping_reserved_range", 0400, NULL,
+			    NULL, &reserved_range_fops);
+	return 0;
+}
+late_initcall(reserved_range_debug_init);
+#endif
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 9f5c7c793c5a10762c559597420d57dd5f71515a..fbad47b3f5e87f18895de12508ad00766ea00cdf 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -399,4 +399,25 @@ extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
 #endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 
+#ifdef CONFIG_CONTIG_ALLOC
+/* This should be paired with folio_put() rather than free_contig_range(). */
+static inline struct folio *folio_alloc_gigantic(int order, gfp_t gfp,
+							int nid, nodemask_t *node)
+{
+	struct page *page;
+
+	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
+		return NULL;
+
+	page = alloc_contig_pages(1 << order, gfp, nid, node);
+
+	return page ? page_folio(page) : NULL;
+}
+#else
+static inline struct folio *folio_alloc_gigantic(int order, gfp_t gfp,
+							int nid, nodemask_t *node)
+{
+	return NULL;
+}
+#endif
 #endif /* __LINUX_GFP_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9213e4f939ff92e72bd03180dd9bab0873b207bf..93da3db9d7ce9e4f60f17ad12a04f46d70f85571 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -305,14 +305,7 @@ static inline int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm,
 }
 #endif /* CONFIG_HUGETLB_INSERT_PAGE */
 
-#ifdef CONFIG_ASCEND_FEATURES
 struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size);
-#else
-static inline struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size)
-{
-	return NULL;
-}
-#endif
 
 #else /* !CONFIG_HUGETLB_PAGE */
 
@@ -1376,4 +1369,23 @@ hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz)
 	return huge_pte_offset(vma->vm_mm, addr, sz);
 }
 
+#ifdef CONFIG_PFN_RANGE_ALLOC
+struct folio *hugetlb_pool_alloc(int nid);
+int hugetlb_pool_free(struct folio *folio);
+struct folio *hugetlb_pool_alloc_size(int nid, unsigned long size);
+#else
+static inline struct folio *hugetlb_pool_alloc(int nid)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline int hugetlb_pool_free(struct folio *folio)
+{
+	return -EINVAL;
+}
+static inline struct folio *hugetlb_pool_alloc_size(int nid, unsigned long size)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif
+
 #endif /* _LINUX_HUGETLB_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1dfc9ecc195e51492db4d52d1d771283e2369238..6afe7df09ca6deb2600ae99a7ac05b93f1efbed2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3820,24 +3820,22 @@ static inline bool page_is_guard(struct page *page)
 	return PageGuard(page);
 }
 
-bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
-		      int migratetype);
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order);
 static inline bool set_page_guard(struct zone *zone, struct page *page,
-				  unsigned int order, int migratetype)
+				  unsigned int order)
 {
 	if (!debug_guardpage_enabled())
 		return false;
-	return __set_page_guard(zone, page, order, migratetype);
+	return __set_page_guard(zone, page, order);
 }
 
-void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
-			int migratetype);
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order);
 static inline void clear_page_guard(struct zone *zone, struct page *page,
-				    unsigned int order, int migratetype)
+				    unsigned int order)
 {
 	if (!debug_guardpage_enabled())
 		return;
-	__clear_page_guard(zone, page, order, migratetype);
+	__clear_page_guard(zone, page, order);
 }
 
 #else	/* CONFIG_DEBUG_PAGEALLOC */
@@ -3847,9 +3845,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
 static inline bool debug_guardpage_enabled(void) { return false; }
 static inline bool page_is_guard(struct page *page) { return false; }
 static inline bool set_page_guard(struct zone *zone, struct page *page,
-			unsigned int order, int migratetype) { return false; }
+			unsigned int order) { return false; }
 static inline void clear_page_guard(struct zone *zone, struct page *page,
-				unsigned int order, int migratetype) {}
+				unsigned int order) {}
 #endif	/* CONFIG_DEBUG_PAGEALLOC */
 
 #ifdef __HAVE_ARCH_GATE_AREA
@@ -4214,4 +4212,36 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
 /* added to mm.h to avoid every caller adding new header file */
 #include <linux/mem_reliable.h>
 
+#ifdef CONFIG_PFN_RANGE_ALLOC
+#define PFN_RANGE_ALLOC_SIZE PMD_SIZE
+#define PFN_RANGE_ALLOC_ORDER PMD_ORDER
+#define PFN_RANGE_ALLOC_NR_PAGES (1 << PFN_RANGE_ALLOC_ORDER)
+
+extern unsigned long contig_mem_pool_percent;
+struct folio *pfn_range_alloc(unsigned int nr_pages, int nid);
+int pfn_range_free(struct folio *folio);
+int set_linear_mapping_nc(unsigned long start_pfn, unsigned long end_pfn, bool set_nc);
+int set_linear_mapping_invalid(unsigned long start_pfn, unsigned long end_pfn,
+										bool set_invalid);
+#else
+static inline struct folio *pfn_range_alloc(unsigned int nr_pages, int nid)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline int pfn_range_free(struct folio *folio)
+{
+	return -EINVAL;
+}
+static inline
+int set_linear_mapping_nc(unsigned long start_pfn, unsigned long end_pfn, bool set_nc)
+{
+	return -EINVAL;
+}
+static inline
+int set_linear_mapping_invalid(unsigned long start_pfn, unsigned long end_pfn,
+										bool set_invalid)
+{
+	return -EINVAL;
+}
+#endif
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b1cded2400498751ff73233d7c3022e116b7b751..afc35b8cb44e2495a21ba80d18bd38718ef25830 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1032,7 +1032,7 @@ struct zone {
 
 	CACHELINE_PADDING(_pad3_);
 
-	KABI_RESERVE(1)
+	KABI_USE(1, unsigned long nr_free_highatomic)
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 4ac34392823a9c74321e18096db07d8bba682aea..73dc2c1841ec13c51b1526279d57134294ca982b 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -3,10 +3,6 @@
 #define __LINUX_PAGEISOLATION_H
 
 #ifdef CONFIG_MEMORY_ISOLATION
-static inline bool has_isolate_pageblock(struct zone *zone)
-{
-	return zone->nr_isolate_pageblock;
-}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -16,10 +12,6 @@ static inline bool is_migrate_isolate(int migratetype)
 	return migratetype == MIGRATE_ISOLATE;
 }
 #else
-static inline bool has_isolate_pageblock(struct zone *zone)
-{
-	return false;
-}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return false;
@@ -34,8 +26,9 @@ static inline bool is_migrate_isolate(int migratetype)
 #define REPORT_FAILURE	0x2
 
 void set_pageblock_migratetype(struct page *page, int migratetype);
-int move_freepages_block(struct zone *zone, struct page *page,
-				int migratetype, int *num_movable);
+
+bool move_freepages_block_isolate(struct zone *zone, struct page *page,
+				  int migratetype);
 
 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			     int migratetype, int flags, gfp_t gfp_flags);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d783d5a27352d3227e49274a0f43157080a0077f..72ecd46fd0c494183967334a41fa8d4ac461ce8d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(struct folio *folio,
 	mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
 }
 
-static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
-					     int migratetype)
-{
-	__mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
-	if (is_migrate_cma(migratetype))
-		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
-}
-
 extern const char * const vmstat_text[];
 
 static inline const char *zone_stat_name(enum zone_stat_item item)
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 3e78e6bd6e18e9ca5d8562fda43a6a2a7089d2c6..28b9d6958724696193d6c9eb727b7aff89f398bd 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -544,6 +544,124 @@ TRACE_EVENT(mm_mem_sampling_damon_record,
 	TP_printk("vaddr=%llx pid=%d", __entry->vaddr, __entry->pid)
 );
 #endif /* CONFIG_DAMON_MEM_SAMPLING */
+
+#ifdef CONFIG_PFN_RANGE_ALLOC
+TRACE_EVENT(pfn_range_alloc,
+
+	TP_PROTO(struct folio *folio, unsigned int nr_pages,
+			int nid),
+
+	TP_ARGS(folio, nr_pages, nid),
+
+	TP_STRUCT__entry(
+		__field(struct folio*, folio)
+		__field(unsigned int, nr_pages)
+		__field(int, nid)
+	),
+
+	TP_fast_assign(
+		__entry->folio = folio;
+		__entry->nr_pages = nr_pages;
+		__entry->nid = nid;
+	),
+
+	TP_printk("folio=%p pfn=0x%lx err=%ld nr_pages=%u nid=%d",
+		!IS_ERR(__entry->folio) ? __entry->folio : NULL,
+		!IS_ERR(__entry->folio) ? folio_pfn(__entry->folio) : 0,
+		!IS_ERR(__entry->folio) ? 0 : PTR_ERR(__entry->folio),
+		__entry->nr_pages,
+		__entry->nid)
+);
+
+TRACE_EVENT(pfn_range_free,
+
+	TP_PROTO(struct folio *folio, int ret),
+
+	TP_ARGS(folio, ret),
+
+	TP_STRUCT__entry(
+		__field(struct folio*, folio)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->folio = folio;
+		__entry->ret = ret;
+	),
+
+	TP_printk("folio=%p pfn=0x%lx ret=%d",
+		__entry->folio, folio_pfn(__entry->folio), __entry->ret)
+);
+
+TRACE_EVENT(hugetlb_pool_alloc,
+
+	TP_PROTO(struct folio *folio, int nid),
+
+	TP_ARGS(folio, nid),
+
+	TP_STRUCT__entry(
+		__field(struct folio*, folio)
+		__field(int, nid)
+	),
+
+	TP_fast_assign(
+		__entry->folio = folio;
+		__entry->nid = nid;
+	),
+
+	TP_printk("folio=%p pfn=0x%lx err=%ld nid=%d",
+		!IS_ERR(__entry->folio) ? __entry->folio : NULL,
+		!IS_ERR(__entry->folio) ? folio_pfn(__entry->folio) : 0,
+		!IS_ERR(__entry->folio) ? 0 : PTR_ERR(__entry->folio),
+		__entry->nid)
+);
+
+TRACE_EVENT(hugetlb_pool_free,
+
+	TP_PROTO(struct folio *folio, int ret),
+
+	TP_ARGS(folio, ret),
+
+	TP_STRUCT__entry(
+		__field(struct folio*, folio)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->folio = folio;
+		__entry->ret = ret;
+	),
+
+	TP_printk("folio=%p pfn=0x%lx ret=%d",
+		__entry->folio, folio_pfn(__entry->folio), __entry->ret)
+);
+
+TRACE_EVENT(hugetlb_pool_alloc_size,
+
+	TP_PROTO(struct folio *folio, int nid, unsigned long size),
+
+	TP_ARGS(folio, nid, size),
+
+	TP_STRUCT__entry(
+		__field(struct folio*, folio)
+		__field(int, nid)
+		__field(unsigned long, size)
+	),
+
+	TP_fast_assign(
+		__entry->folio = folio;
+		__entry->nid = nid;
+		__entry->size = size;
+	),
+
+	TP_printk("folio=%p pfn=0x%lx err=%ld nid=%d size=0x%lx",
+		!IS_ERR(__entry->folio) ? __entry->folio : NULL,
+		!IS_ERR(__entry->folio) ? folio_pfn(__entry->folio) : 0,
+		!IS_ERR(__entry->folio) ? 0 : PTR_ERR(__entry->folio),
+		__entry->nid, __entry->size)
+);
+
+#endif
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/compaction.c b/mm/compaction.c
index ed0baf7404c8b8e54624a3ed70fe42532e5b0ed0..98be4f7c07dd6c731ac3bf16e06de6187ec866a2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -80,33 +80,6 @@ static inline bool is_via_compact_memory(int order) { return false; }
 #define COMPACTION_HPAGE_ORDER	(PMD_SHIFT - PAGE_SHIFT)
 #endif
 
-static void split_map_pages(struct list_head *freepages)
-{
-	unsigned int i, order;
-	struct page *page, *next;
-	LIST_HEAD(tmp_list);
-
-	for (order = 0; order < NR_PAGE_ORDERS; order++) {
-		list_for_each_entry_safe(page, next, &freepages[order], lru) {
-			unsigned int nr_pages;
-
-			list_del(&page->lru);
-
-			nr_pages = 1 << order;
-
-			post_alloc_hook(page, order, __GFP_MOVABLE);
-			if (order)
-				split_page(page, order);
-
-			for (i = 0; i < nr_pages; i++) {
-				list_add(&page->lru, &tmp_list);
-				page++;
-			}
-		}
-		list_splice_init(&tmp_list, &freepages[0]);
-	}
-}
-
 static unsigned long release_free_list(struct list_head *freepages)
 {
 	int order;
@@ -737,11 +710,11 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
  *
  * Non-free pages, invalid PFNs, or zone boundaries within the
  * [start_pfn, end_pfn) range are considered errors, cause function to
- * undo its actions and return zero.
+ * undo its actions and return zero. cc->freepages[] are empty.
  *
  * Otherwise, function returns one-past-the-last PFN of isolated page
  * (which may be greater then end_pfn if end fell in a middle of
- * a free page).
+ * a free page). cc->freepages[] contain free pages isolated.
  */
 unsigned long
 isolate_freepages_range(struct compact_control *cc,
@@ -749,10 +722,9 @@ isolate_freepages_range(struct compact_control *cc,
 {
 	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
 	int order;
-	struct list_head tmp_freepages[NR_PAGE_ORDERS];
 
 	for (order = 0; order < NR_PAGE_ORDERS; order++)
-		INIT_LIST_HEAD(&tmp_freepages[order]);
+		INIT_LIST_HEAD(&cc->freepages[order]);
 
 	pfn = start_pfn;
 	block_start_pfn = pageblock_start_pfn(pfn);
@@ -783,7 +755,7 @@ isolate_freepages_range(struct compact_control *cc,
 			break;
 
 		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
-					block_end_pfn, tmp_freepages, 0, true);
+					block_end_pfn, cc->freepages, 0, true);
 
 		/*
 		 * In strict mode, isolate_freepages_block() returns 0 if
@@ -802,13 +774,10 @@ isolate_freepages_range(struct compact_control *cc,
 
 	if (pfn < end_pfn) {
 		/* Loop terminated early, cleanup. */
-		release_free_list(tmp_freepages);
+		release_free_list(cc->freepages);
 		return 0;
 	}
 
-	/* __isolate_free_page() does not map the pages */
-	split_map_pages(tmp_freepages);
-
 	/* We don't use freelists for anything. */
 	return pfn;
 }
@@ -2333,7 +2302,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
 	ret = COMPACT_NO_SUITABLE_PAGE;
 	for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
 		struct free_area *area = &cc->zone->free_area[order];
-		bool can_steal;
+		bool claim_block;
 
 		/* Job done if page is free of the right migratetype */
 		if (!free_area_empty(area, migratetype))
@@ -2350,7 +2319,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
 		 * other migratetype buddy lists.
 		 */
 		if (find_suitable_fallback(area, order, migratetype,
-						true, &can_steal) != -1)
+						true, &claim_block) != -1)
 			/*
 			 * Movable pages are OK in any pageblock. If we are
 			 * stealing for a non-movable allocation, make sure
diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c
index f9d145730fd1693a0265de8a47178c6c320ff054..03a810927d0a73f08ac8fbd77b7fe7a6f5953b66 100644
--- a/mm/debug_page_alloc.c
+++ b/mm/debug_page_alloc.c
@@ -32,8 +32,7 @@ static int __init debug_guardpage_minorder_setup(char *buf)
 }
 early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
 
-bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
-		      int migratetype)
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order)
 {
 	if (order >= debug_guardpage_minorder())
 		return false;
@@ -41,19 +40,12 @@ bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
 	__SetPageGuard(page);
 	INIT_LIST_HEAD(&page->buddy_list);
 	set_page_private(page, order);
-	/* Guard pages are not available for any usage */
-	if (!is_migrate_isolate(migratetype))
-		__mod_zone_freepage_state(zone, -(1 << order), migratetype);
 
 	return true;
 }
 
-void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
-		      int migratetype)
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order)
 {
 	__ClearPageGuard(page);
-
 	set_page_private(page, 0);
-	if (!is_migrate_isolate(migratetype))
-		__mod_zone_freepage_state(zone, (1 << order), migratetype);
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9ba075d424f900043c12591c2ed274b56bbf2d36..1cd1196e0d66fba77811cf211c9e4f22c9d04b73 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -37,6 +37,9 @@
 #include <linux/mm_inline.h>
 #include <linux/share_pool.h>
 #include <linux/dynamic_pool.h>
+#ifndef __GENKSYMS__
+#include <trace/events/kmem.h>
+#endif
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -7917,7 +7920,6 @@ int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, unsigned long addr,
 EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte_by_pa);
 #endif /* CONFIG_HUGETLB_INSERT_PAGE */
 
-#ifdef CONFIG_ASCEND_FEATURES
 struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size)
 {
 	gfp_t gfp_mask;
@@ -7926,13 +7928,12 @@ struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size)
 	unsigned long flags;
 	struct folio *folio = NULL;
 
-	nodes_clear(nodemask);
-	node_set(nid, nodemask);
-
 	h = size_to_hstate(size);
 	if (!h)
 		return NULL;
 
+	nodes_clear(nodemask);
+	node_set(nid, nodemask);
 	gfp_mask = htlb_alloc_mask(h);
 	spin_lock_irqsave(&hugetlb_lock, flags);
 	if (h->free_huge_pages - h->resv_huge_pages > 0)
@@ -7942,4 +7943,57 @@ struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size)
 	return folio;
 }
 EXPORT_SYMBOL(alloc_hugetlb_folio_size);
+
+#ifdef CONFIG_PFN_RANGE_ALLOC
+struct folio *hugetlb_pool_alloc(int nid)
+{
+	struct folio *folio = ERR_PTR(-EINVAL);
+
+	if (nid < 0 || nid >= MAX_NUMNODES)
+		goto out;
+
+	folio = alloc_hugetlb_folio_size(nid, PFN_RANGE_ALLOC_SIZE);
+	if (!folio)
+		folio = ERR_PTR(-ENOMEM);
+
+out:
+	trace_hugetlb_pool_alloc(folio, nid);
+	return folio;
+}
+EXPORT_SYMBOL_GPL(hugetlb_pool_alloc);
+
+int hugetlb_pool_free(struct folio *folio)
+{
+	int ret = -EINVAL;
+
+	if (!folio_test_hugetlb(folio))
+		goto out;
+
+	ret = 0;
+	folio_put(folio);
+out:
+	trace_hugetlb_pool_free(folio, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hugetlb_pool_free);
+
+struct folio *hugetlb_pool_alloc_size(int nid, unsigned long size)
+{
+	struct folio *folio = ERR_PTR(-EINVAL);
+
+	if (nid < 0 || nid >= MAX_NUMNODES)
+		goto out;
+
+	if ((size != PMD_SIZE) && (size != PUD_SIZE))
+		goto out;
+
+	folio = alloc_hugetlb_folio_size(nid, size);
+	if (!folio)
+		folio = ERR_PTR(-ENOMEM);
+
+out:
+	trace_hugetlb_pool_alloc_size(folio, nid, size);
+	return folio;
+}
+EXPORT_SYMBOL_GPL(hugetlb_pool_alloc_size);
 #endif
diff --git a/mm/internal.h b/mm/internal.h
index 55b0698ad3b02300fbc1a89187765c20e5f60483..3a127c3e232513928dc547805f81d4a14d051c12 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -714,10 +714,6 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
 void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
 		unsigned long, enum meminit_context, struct vmem_altmap *, int);
 
-
-int split_free_page(struct page *free_page,
-			unsigned int order, unsigned long split_pfn_offset);
-
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 #define MAX_PAGE_ORDER	MAX_ORDER
@@ -788,17 +784,13 @@ int
 isolate_migratepages_range(struct compact_control *cc,
 			   unsigned long low_pfn, unsigned long end_pfn);
 
-int __alloc_contig_migrate_range(struct compact_control *cc,
-					unsigned long start, unsigned long end,
-					int migratetype);
-
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void init_cma_reserved_pageblock(struct page *page);
 
 #endif /* CONFIG_COMPACTION || CONFIG_CMA */
 
 int find_suitable_fallback(struct free_area *area, unsigned int order,
-			int migratetype, bool only_stealable, bool *can_steal);
+			int migratetype, bool claim_only, bool *claim_block);
 
 static inline bool free_area_empty(struct free_area *area, int migratetype)
 {
@@ -1196,11 +1188,6 @@ static inline bool is_migrate_highatomic(enum migratetype migratetype)
 	return migratetype == MIGRATE_HIGHATOMIC;
 }
 
-static inline bool is_migrate_highatomic_page(struct page *page)
-{
-	return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
-}
-
 void setup_zone_pageset(struct zone *zone);
 
 struct migration_target_control {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index afbcbc8adeb299644b47e7cbf9dd0822034b3432..8f06d6f8124d553f1866659a7eb03da7a869aa68 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -208,24 +208,6 @@ EXPORT_SYMBOL(node_states);
 
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
-/*
- * A cached value of the page's pageblock's migratetype, used when the page is
- * put on a pcplist. Used to avoid the pageblock migratetype lookup when
- * freeing from pcplists in most cases, at the cost of possibly becoming stale.
- * Also the migratetype set in the page does not necessarily match the pcplist
- * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
- * other index - this ensures that it will be put on the correct CMA freelist.
- */
-static inline int get_pcppage_migratetype(struct page *page)
-{
-	return page->index;
-}
-
-static inline void set_pcppage_migratetype(struct page *page, int migratetype)
-{
-	page->index = migratetype;
-}
-
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 unsigned int pageblock_order __read_mostly;
 #endif
@@ -655,23 +637,38 @@ compaction_capture(struct capture_control *capc, struct page *page,
 }
 #endif /* CONFIG_COMPACTION */
 
-/* Used for pages not on another list */
-static inline void add_to_free_list(struct page *page, struct zone *zone,
-				    unsigned int order, int migratetype)
+static inline void account_freepages(struct zone *zone, int nr_pages,
+				     int migratetype)
 {
-	struct free_area *area = &zone->free_area[order];
+	lockdep_assert_held(&zone->lock);
 
-	list_add(&page->buddy_list, &area->free_list[migratetype]);
-	area->nr_free++;
+	if (is_migrate_isolate(migratetype))
+		return;
+
+	__mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
+
+	if (is_migrate_cma(migratetype))
+		__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
+	else if (is_migrate_highatomic(migratetype))
+		WRITE_ONCE(zone->nr_free_highatomic,
+			   zone->nr_free_highatomic + nr_pages);
 }
 
 /* Used for pages not on another list */
-static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
-					 unsigned int order, int migratetype)
+static inline void __add_to_free_list(struct page *page, struct zone *zone,
+				      unsigned int order, int migratetype,
+				      bool tail)
 {
 	struct free_area *area = &zone->free_area[order];
 
-	list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
+	VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
+		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
+		     get_pageblock_migratetype(page), migratetype, 1 << order);
+
+	if (tail)
+		list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
+	else
+		list_add(&page->buddy_list, &area->free_list[migratetype]);
 	area->nr_free++;
 }
 
@@ -681,16 +678,28 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
  * allocation again (e.g., optimization for memory onlining).
  */
 static inline void move_to_free_list(struct page *page, struct zone *zone,
-				     unsigned int order, int migratetype)
+				     unsigned int order, int old_mt, int new_mt)
 {
 	struct free_area *area = &zone->free_area[order];
 
-	list_move_tail(&page->buddy_list, &area->free_list[migratetype]);
+	/* Free page moving can fail, so it happens before the type update */
+	VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
+		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
+		     get_pageblock_migratetype(page), old_mt, 1 << order);
+
+	list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
+
+	account_freepages(zone, -(1 << order), old_mt);
+	account_freepages(zone, 1 << order, new_mt);
 }
 
-static inline void del_page_from_free_list(struct page *page, struct zone *zone,
-					   unsigned int order)
+static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
+					     unsigned int order, int migratetype)
 {
+		VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
+		     "page type is %lu, passed migratetype is %d (nr=%d)\n",
+		     get_pageblock_migratetype(page), migratetype, 1 << order);
+
 	/* clear reported state and update reported page count */
 	if (page_reported(page))
 		__ClearPageReported(page);
@@ -701,6 +710,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
 	zone->free_area[order].nr_free--;
 }
 
+static inline void del_page_from_free_list(struct page *page, struct zone *zone,
+					   unsigned int order, int migratetype)
+{
+	__del_page_from_free_list(page, zone, order, migratetype);
+	account_freepages(zone, -(1 << order), migratetype);
+}
+
 static inline struct page *get_page_from_free_area(struct free_area *area,
 					    int migratetype)
 {
@@ -772,16 +788,17 @@ static inline void __free_one_page(struct page *page,
 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
 
 	VM_BUG_ON(migratetype == -1);
-	if (likely(!is_migrate_isolate(migratetype)))
-		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 
 	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
 
+	account_freepages(zone, 1 << order, migratetype);
+
 	while (order < MAX_ORDER) {
+		int buddy_mt = migratetype;
+
 		if (compaction_capture(capc, page, order, migratetype)) {
-			__mod_zone_freepage_state(zone, -(1 << order),
-								migratetype);
+			account_freepages(zone, -(1 << order), migratetype);
 			return;
 		}
 
@@ -796,11 +813,11 @@ static inline void __free_one_page(struct page *page,
 			 * pageblock isolation could cause incorrect freepage or CMA
 			 * accounting or HIGHATOMIC accounting.
 			 */
-			int buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
+			buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
 
-			if (migratetype != buddy_mt
-					&& (!migratetype_is_mergeable(migratetype) ||
-						!migratetype_is_mergeable(buddy_mt)))
+			if (migratetype != buddy_mt &&
+			    (!migratetype_is_mergeable(migratetype) ||
+			     !migratetype_is_mergeable(buddy_mt)))
 				goto done_merging;
 		}
 
@@ -809,9 +826,19 @@ static inline void __free_one_page(struct page *page,
 		 * merge with it and move up one order.
 		 */
 		if (page_is_guard(buddy))
-			clear_page_guard(zone, buddy, order, migratetype);
+			clear_page_guard(zone, buddy, order);
 		else
-			del_page_from_free_list(buddy, zone, order);
+			__del_page_from_free_list(buddy, zone, order, buddy_mt);
+
+		if (unlikely(buddy_mt != migratetype)) {
+			/*
+			 * Match buddy type. This ensures that an
+			 * expand() down the line puts the sub-blocks
+			 * on the right freelists.
+			 */
+			set_pageblock_migratetype(buddy, migratetype);
+		}
+
 		combined_pfn = buddy_pfn & pfn;
 		page = page + (combined_pfn - pfn);
 		pfn = combined_pfn;
@@ -828,74 +855,13 @@ static inline void __free_one_page(struct page *page,
 	else
 		to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
 
-	if (to_tail)
-		add_to_free_list_tail(page, zone, order, migratetype);
-	else
-		add_to_free_list(page, zone, order, migratetype);
+	__add_to_free_list(page, zone, order, migratetype, to_tail);
 
 	/* Notify page reporting subsystem of freed page */
 	if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
 		page_reporting_notify_free(order);
 }
 
-/**
- * split_free_page() -- split a free page at split_pfn_offset
- * @free_page:		the original free page
- * @order:		the order of the page
- * @split_pfn_offset:	split offset within the page
- *
- * Return -ENOENT if the free page is changed, otherwise 0
- *
- * It is used when the free page crosses two pageblocks with different migratetypes
- * at split_pfn_offset within the page. The split free page will be put into
- * separate migratetype lists afterwards. Otherwise, the function achieves
- * nothing.
- */
-int split_free_page(struct page *free_page,
-			unsigned int order, unsigned long split_pfn_offset)
-{
-	struct zone *zone = page_zone(free_page);
-	unsigned long free_page_pfn = page_to_pfn(free_page);
-	unsigned long pfn;
-	unsigned long flags;
-	int free_page_order;
-	int mt;
-	int ret = 0;
-
-	if (split_pfn_offset == 0)
-		return ret;
-
-	spin_lock_irqsave(&zone->lock, flags);
-
-	if (!PageBuddy(free_page) || buddy_order(free_page) != order) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	mt = get_pfnblock_migratetype(free_page, free_page_pfn);
-	if (likely(!is_migrate_isolate(mt)))
-		__mod_zone_freepage_state(zone, -(1UL << order), mt);
-
-	del_page_from_free_list(free_page, zone, order);
-	for (pfn = free_page_pfn;
-	     pfn < free_page_pfn + (1UL << order);) {
-		int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
-
-		free_page_order = min_t(unsigned int,
-					pfn ? __ffs(pfn) : order,
-					__fls(split_pfn_offset));
-		__free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
-				mt, FPI_NONE);
-		pfn += 1UL << free_page_order;
-		split_pfn_offset -= (1UL << free_page_order);
-		/* we have done the first part, now switch to second part */
-		if (split_pfn_offset == 0)
-			split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
-	}
-out:
-	spin_unlock_irqrestore(&zone->lock, flags);
-	return ret;
-}
 /*
  * A bad page could be due to a number of fields. Instead of multiple branches,
  * try and check multiple fields with one check. The caller must do a detailed
@@ -1202,7 +1168,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 {
 	unsigned long flags;
 	unsigned int order;
-	bool isolated_pageblocks;
 	struct page *page;
 
 	/*
@@ -1215,7 +1180,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	pindex = pindex - 1;
 
 	spin_lock_irqsave(&zone->lock, flags);
-	isolated_pageblocks = has_isolate_pageblock(zone);
 
 	while (count > 0) {
 		struct list_head *list;
@@ -1231,23 +1195,19 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 		order = pindex_to_order(pindex);
 		nr_pages = 1 << order;
 		do {
+			unsigned long pfn;
 			int mt;
 
 			page = list_last_entry(list, struct page, pcp_list);
-			mt = get_pcppage_migratetype(page);
+			pfn = page_to_pfn(page);
+			mt = get_pfnblock_migratetype(page, pfn);
 
 			/* must delete to avoid corrupting pcp list */
 			list_del(&page->pcp_list);
 			count -= nr_pages;
 			pcp->count -= nr_pages;
 
-			/* MIGRATE_ISOLATE page should not go to pcplists */
-			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-			/* Pageblock could have been isolated meanwhile */
-			if (unlikely(isolated_pageblocks))
-				mt = get_pageblock_migratetype(page);
-
-			__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
+			__free_one_page(page, pfn, zone, order, mt, FPI_NONE);
 			trace_mm_page_pcpu_drain(page, order, mt);
 		} while (count > 0 && !list_empty(list));
 	}
@@ -1255,47 +1215,51 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
-static void free_one_page(struct zone *zone,
-				struct page *page, unsigned long pfn,
-				unsigned int order,
-				int migratetype, fpi_t fpi_flags)
+/* Split a multi-block free page into its individual pageblocks. */
+static void split_large_buddy(struct zone *zone, struct page *page,
+			      unsigned long pfn, int order, fpi_t fpi)
+{
+	unsigned long end = pfn + (1 << order);
+
+	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
+	/* Caller removed page from freelist, buddy info cleared! */
+	VM_WARN_ON_ONCE(PageBuddy(page));
+
+	if (order > pageblock_order)
+		order = pageblock_order;
+
+	do {
+		int mt = get_pfnblock_migratetype(page, pfn);
+
+		__free_one_page(page, pfn, zone, order, mt, fpi);
+		pfn += 1 << order;
+		if (pfn == end)
+			break;
+		page = pfn_to_page(pfn);
+	} while (1);
+}
+
+static void free_one_page(struct zone *zone, struct page *page,
+			  unsigned long pfn, unsigned int order,
+			  fpi_t fpi_flags)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&zone->lock, flags);
-	if (unlikely(has_isolate_pageblock(zone) ||
-		is_migrate_isolate(migratetype))) {
-		migratetype = get_pfnblock_migratetype(page, pfn);
-	}
-	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
+	split_large_buddy(zone, page, pfn, order, fpi_flags);
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 static void __free_pages_ok(struct page *page, unsigned int order,
 			    fpi_t fpi_flags)
 {
-	unsigned long flags;
-	int migratetype;
 	unsigned long pfn = page_to_pfn(page);
 	struct zone *zone = page_zone(page);
 
 	if (!free_pages_prepare(page, order))
 		return;
 
-	/*
-	 * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
-	 * is used to avoid calling get_pfnblock_migratetype() under the lock.
-	 * This will reduce the lock holding time.
-	 */
-	migratetype = get_pfnblock_migratetype(page, pfn);
-
-	spin_lock_irqsave(&zone->lock, flags);
-	if (unlikely(has_isolate_pageblock(zone) ||
-		is_migrate_isolate(migratetype))) {
-		migratetype = get_pfnblock_migratetype(page, pfn);
-	}
-	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
-	spin_unlock_irqrestore(&zone->lock, flags);
+	free_one_page(zone, page, pfn, order, fpi_flags);
 
 	__count_vm_events(PGFREE, 1 << order);
 }
@@ -1402,10 +1366,11 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
  *
  * -- nyc
  */
-static inline void expand(struct zone *zone, struct page *page,
-	int low, int high, int migratetype)
+static inline unsigned int expand(struct zone *zone, struct page *page, int low,
+				  int high, int migratetype)
 {
-	unsigned long size = 1 << high;
+	unsigned int size = 1 << high;
+	unsigned int nr_added = 0;
 
 	while (high > low) {
 		high--;
@@ -1418,12 +1383,26 @@ static inline void expand(struct zone *zone, struct page *page,
 		 * Corresponding page table entries will not be touched,
 		 * pages will stay not present in virtual address space
 		 */
-		if (set_page_guard(zone, &page[size], high, migratetype))
+		if (set_page_guard(zone, &page[size], high))
 			continue;
 
-		add_to_free_list(&page[size], zone, high, migratetype);
+		__add_to_free_list(&page[size], zone, high, migratetype, false);
 		set_buddy_order(&page[size], high);
+		nr_added += size;
 	}
+
+	return nr_added;
+}
+
+static __always_inline void page_del_and_expand(struct zone *zone,
+						struct page *page, int low,
+						int high, int migratetype)
+{
+	int nr_pages = 1 << high;
+
+	__del_page_from_free_list(page, zone, high, migratetype);
+	nr_pages -= expand(zone, page, low, high, migratetype);
+	account_freepages(zone, -nr_pages, migratetype);
 }
 
 static void check_new_page_bad(struct page *page)
@@ -1612,9 +1591,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
 		page = get_page_from_free_area(area, migratetype);
 		if (!page)
 			continue;
-		del_page_from_free_list(page, zone, current_order);
-		expand(zone, page, order, current_order, migratetype);
-		set_pcppage_migratetype(page, migratetype);
+
+		page_del_and_expand(zone, page, order, current_order,
+				    migratetype);
 		trace_mm_page_alloc_zone_locked(page, order, migratetype,
 				pcp_allowed_order(order) &&
 				migratetype < MIGRATE_PCPTYPES);
@@ -1649,30 +1628,23 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
 #endif
 
 /*
- * Move the free pages in a range to the freelist tail of the requested type.
- * Note that start_page and end_pages are not aligned on a pageblock
- * boundary. If alignment is required, use move_freepages_block()
+ * Change the type of a block and move all its free pages to that
+ * type's freelist.
  */
-static int move_freepages(struct zone *zone,
-			  unsigned long start_pfn, unsigned long end_pfn,
-			  int migratetype, int *num_movable)
+static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
+				  int old_mt, int new_mt)
 {
 	struct page *page;
-	unsigned long pfn;
+	unsigned long pfn, end_pfn;
 	unsigned int order;
 	int pages_moved = 0;
 
-	for (pfn = start_pfn; pfn <= end_pfn;) {
+	VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1));
+	end_pfn = pageblock_end_pfn(start_pfn);
+
+	for (pfn = start_pfn; pfn < end_pfn;) {
 		page = pfn_to_page(pfn);
 		if (!PageBuddy(page)) {
-			/*
-			 * We assume that pages that could be isolated for
-			 * migration are movable. But we don't actually try
-			 * isolating, as that would be expensive.
-			 */
-			if (num_movable &&
-					(PageLRU(page) || __PageMovable(page)))
-				(*num_movable)++;
 			pfn++;
 			continue;
 		}
@@ -1682,36 +1654,166 @@ static int move_freepages(struct zone *zone,
 		VM_BUG_ON_PAGE(page_zone(page) != zone, page);
 
 		order = buddy_order(page);
-		move_to_free_list(page, zone, order, migratetype);
+
+		move_to_free_list(page, zone, order, old_mt, new_mt);
+
 		pfn += 1 << order;
 		pages_moved += 1 << order;
 	}
 
+	set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
+
 	return pages_moved;
 }
 
-int move_freepages_block(struct zone *zone, struct page *page,
-				int migratetype, int *num_movable)
+static bool prep_move_freepages_block(struct zone *zone, struct page *page,
+				      unsigned long *start_pfn,
+				      int *num_free, int *num_movable)
 {
-	unsigned long start_pfn, end_pfn, pfn;
+	unsigned long pfn, start, end;
+
+	pfn = page_to_pfn(page);
+	start = pageblock_start_pfn(pfn);
+	end = pageblock_end_pfn(pfn);
+
+	/*
+	 * The caller only has the lock for @zone, don't touch ranges
+	 * that straddle into other zones. While we could move part of
+	 * the range that's inside the zone, this call is usually
+	 * accompanied by other operations such as migratetype updates
+	 * which also should be locked.
+	 */
+	if (!zone_spans_pfn(zone, start))
+		return false;
+	if (!zone_spans_pfn(zone, end - 1))
+		return false;
 
-	if (num_movable)
+	*start_pfn = start;
+
+	if (num_free) {
+		*num_free = 0;
 		*num_movable = 0;
+		for (pfn = start; pfn < end;) {
+			page = pfn_to_page(pfn);
+			if (PageBuddy(page)) {
+				int nr = 1 << buddy_order(page);
 
-	pfn = page_to_pfn(page);
-	start_pfn = pageblock_start_pfn(pfn);
-	end_pfn = pageblock_end_pfn(pfn) - 1;
+				*num_free += nr;
+				pfn += nr;
+				continue;
+			}
+			/*
+			 * We assume that pages that could be isolated for
+			 * migration are movable. But we don't actually try
+			 * isolating, as that would be expensive.
+			 */
+			if (PageLRU(page) || __PageMovable(page))
+				(*num_movable)++;
+			pfn++;
+		}
+	}
 
-	/* Do not cross zone boundaries */
-	if (!zone_spans_pfn(zone, start_pfn))
-		start_pfn = pfn;
-	if (!zone_spans_pfn(zone, end_pfn))
-		return 0;
+	return true;
+}
+
+static int move_freepages_block(struct zone *zone, struct page *page,
+				int old_mt, int new_mt)
+{
+	unsigned long start_pfn;
+
+	if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
+		return -1;
 
-	return move_freepages(zone, start_pfn, end_pfn, migratetype,
-								num_movable);
+	return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
 }
 
+#ifdef CONFIG_MEMORY_ISOLATION
+/* Look for a buddy that straddles start_pfn */
+static unsigned long find_large_buddy(unsigned long start_pfn)
+{
+	int order = 0;
+	struct page *page;
+	unsigned long pfn = start_pfn;
+
+	while (!PageBuddy(page = pfn_to_page(pfn))) {
+		/* Nothing found */
+		if (++order > MAX_PAGE_ORDER)
+			return start_pfn;
+		pfn &= ~0UL << order;
+	}
+
+	/*
+	 * Found a preceding buddy, but does it straddle?
+	 */
+	if (pfn + (1 << buddy_order(page)) > start_pfn)
+		return pfn;
+
+	/* Nothing found */
+	return start_pfn;
+}
+
+/**
+ * move_freepages_block_isolate - move free pages in block for page isolation
+ * @zone: the zone
+ * @page: the pageblock page
+ * @migratetype: migratetype to set on the pageblock
+ *
+ * This is similar to move_freepages_block(), but handles the special
+ * case encountered in page isolation, where the block of interest
+ * might be part of a larger buddy spanning multiple pageblocks.
+ *
+ * Unlike the regular page allocator path, which moves pages while
+ * stealing buddies off the freelist, page isolation is interested in
+ * arbitrary pfn ranges that may have overlapping buddies on both ends.
+ *
+ * This function handles that. Straddling buddies are split into
+ * individual pageblocks. Only the block of interest is moved.
+ *
+ * Returns %true if pages could be moved, %false otherwise.
+ */
+bool move_freepages_block_isolate(struct zone *zone, struct page *page,
+				  int migratetype)
+{
+	unsigned long start_pfn, pfn;
+
+	if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
+		return false;
+
+	/* No splits needed if buddies can't span multiple blocks */
+	if (pageblock_order == MAX_PAGE_ORDER)
+		goto move;
+
+	/* We're a tail block in a larger buddy */
+	pfn = find_large_buddy(start_pfn);
+	if (pfn != start_pfn) {
+		struct page *buddy = pfn_to_page(pfn);
+		int order = buddy_order(buddy);
+
+		del_page_from_free_list(buddy, zone, order,
+					get_pfnblock_migratetype(buddy, pfn));
+		set_pageblock_migratetype(page, migratetype);
+		split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
+		return true;
+	}
+
+	/* We're the starting block of a larger buddy */
+	if (PageBuddy(page) && buddy_order(page) > pageblock_order) {
+		int order = buddy_order(page);
+
+		del_page_from_free_list(page, zone, order,
+					get_pfnblock_migratetype(page, pfn));
+		set_pageblock_migratetype(page, migratetype);
+		split_large_buddy(zone, page, pfn, order, FPI_NONE);
+		return true;
+	}
+move:
+	__move_freepages_block(zone, start_pfn,
+			       get_pfnblock_migratetype(page, start_pfn),
+			       migratetype);
+	return true;
+}
+#endif /* CONFIG_MEMORY_ISOLATION */
+
 static void change_pageblock_range(struct page *pageblock_page,
 					int start_order, int migratetype)
 {
@@ -1724,35 +1826,49 @@ static void change_pageblock_range(struct page *pageblock_page,
 }
 
 /*
- * When we are falling back to another migratetype during allocation, try to
- * steal extra free pages from the same pageblocks to satisfy further
- * allocations, instead of polluting multiple pageblocks.
- *
- * If we are stealing a relatively large buddy page, it is likely there will
- * be more free pages in the pageblock, so try to steal them all. For
- * reclaimable and unmovable allocations, we steal regardless of page size,
- * as fragmentation caused by those allocations polluting movable pageblocks
- * is worse than movable allocations stealing from unmovable and reclaimable
- * pageblocks.
+ * When we are falling back to another migratetype during allocation, should we
+ * try to claim an entire block to satisfy further allocations, instead of
+ * polluting multiple pageblocks?
  */
-static bool can_steal_fallback(unsigned int order, int start_mt)
+static bool should_try_claim_block(unsigned int order, int start_mt)
 {
 	/*
 	 * Leaving this order check is intended, although there is
 	 * relaxed order check in next check. The reason is that
-	 * we can actually steal whole pageblock if this condition met,
+	 * we can actually claim the whole pageblock if this condition met,
 	 * but, below check doesn't guarantee it and that is just heuristic
 	 * so could be changed anytime.
 	 */
 	if (order >= pageblock_order)
 		return true;
 
-	if (order >= pageblock_order / 2 ||
-		start_mt == MIGRATE_RECLAIMABLE ||
-		start_mt == MIGRATE_UNMOVABLE ||
-		page_group_by_mobility_disabled)
+	/*
+	 * Above a certain threshold, always try to claim, as it's likely there
+	 * will be more free pages in the pageblock.
+	 */
+	if (order >= pageblock_order / 2)
 		return true;
 
+	/*
+	 * Unmovable/reclaimable allocations would cause permanent
+	 * fragmentations if they fell back to allocating from a movable block
+	 * (polluting it), so we try to claim the whole block regardless of the
+	 * allocation size. Later movable allocations can always steal from this
+	 * block, which is less problematic.
+	 */
+	if (start_mt == MIGRATE_RECLAIMABLE || start_mt == MIGRATE_UNMOVABLE)
+		return true;
+
+	if (page_group_by_mobility_disabled)
+		return true;
+
+	/*
+	 * Movable pages won't cause permanent fragmentation, so when you alloc
+	 * small pages, we just need to temporarily steal unmovable or
+	 * reclaimable pages that are closest to the request size. After a
+	 * while, memory compaction may occur to form large contiguous pages,
+	 * and the next movable allocation may not need to steal.
+	 */
 	return false;
 }
 
@@ -1794,33 +1910,29 @@ static inline bool boost_watermark(struct zone *zone)
 }
 
 /*
- * This function implements actual steal behaviour. If order is large enough,
- * we can steal whole pageblock. If not, we first move freepages in this
- * pageblock to our migratetype and determine how many already-allocated pages
- * are there in the pageblock with a compatible migratetype. If at least half
- * of pages are free or compatible, we can change migratetype of the pageblock
- * itself, so pages freed in the future will be put on the correct free list.
+ * This function implements actual block claiming behaviour. If order is large
+ * enough, we can claim the whole pageblock for the requested migratetype. If
+ * not, we check the pageblock for constituent pages; if at least half of the
+ * pages are free or compatible, we can still claim the whole block, so pages
+ * freed in the future will be put on the correct free list.
  */
-static void steal_suitable_fallback(struct zone *zone, struct page *page,
-		unsigned int alloc_flags, int start_type, bool whole_block)
+static struct page *
+try_to_claim_block(struct zone *zone, struct page *page,
+		   int current_order, int order, int start_type,
+		   int block_type, unsigned int alloc_flags)
 {
-	unsigned int current_order = buddy_order(page);
 	int free_pages, movable_pages, alike_pages;
-	int old_block_type;
-
-	old_block_type = get_pageblock_migratetype(page);
-
-	/*
-	 * This can happen due to races and we want to prevent broken
-	 * highatomic accounting.
-	 */
-	if (is_migrate_highatomic(old_block_type))
-		goto single_page;
+	unsigned long start_pfn;
 
 	/* Take ownership for orders >= pageblock_order */
 	if (current_order >= pageblock_order) {
+		unsigned int nr_added;
+
+		del_page_from_free_list(page, zone, current_order, block_type);
 		change_pageblock_range(page, current_order, start_type);
-		goto single_page;
+		nr_added = expand(zone, page, order, current_order, start_type);
+		account_freepages(zone, nr_added, start_type);
+		return page;
 	}
 
 	/*
@@ -1831,15 +1943,10 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
 	if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD))
 		set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
 
-	/* We are not allowed to try stealing from the whole block */
-	if (!whole_block)
-		goto single_page;
-
-	free_pages = move_freepages_block(zone, page, start_type,
-						&movable_pages);
 	/* moving whole block can fail due to zone boundary conditions */
-	if (!free_pages)
-		goto single_page;
+	if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages,
+				       &movable_pages))
+		return NULL;
 
 	/*
 	 * Determine how many pages are compatible with our allocation.
@@ -1856,7 +1963,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
 		 * vice versa, be conservative since we can't distinguish the
 		 * exact migratetype of non-movable pages.
 		 */
-		if (old_block_type == MIGRATE_MOVABLE)
+		if (block_type == MIGRATE_MOVABLE)
 			alike_pages = pageblock_nr_pages
 						- (free_pages + movable_pages);
 		else
@@ -1867,23 +1974,24 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
 	 * compatible migratability as our allocation, claim the whole block.
 	 */
 	if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
-			page_group_by_mobility_disabled)
-		set_pageblock_migratetype(page, start_type);
-
-	return;
+			page_group_by_mobility_disabled) {
+		__move_freepages_block(zone, start_pfn, block_type, start_type);
+		return __rmqueue_smallest(zone, order, start_type);
+	}
 
-single_page:
-	move_to_free_list(page, zone, current_order, start_type);
+	return NULL;
 }
 
 /*
  * Check whether there is a suitable fallback freepage with requested order.
- * If only_stealable is true, this function returns fallback_mt only if
- * we can steal other freepages all together. This would help to reduce
+ * Sets *claim_block to instruct the caller whether it should convert a whole
+ * pageblock to the returned migratetype.
+ * If only_claim is true, this function returns fallback_mt only if
+ * we would do this whole-block claiming. This would help to reduce
  * fragmentation due to mixed migratetype pages in one pageblock.
  */
 int find_suitable_fallback(struct free_area *area, unsigned int order,
-			int migratetype, bool only_stealable, bool *can_steal)
+			int migratetype, bool only_claim, bool *claim_block)
 {
 	int i;
 	int fallback_mt;
@@ -1891,19 +1999,16 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 	if (area->nr_free == 0)
 		return -1;
 
-	*can_steal = false;
+	*claim_block = false;
 	for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
 		fallback_mt = fallbacks[migratetype][i];
 		if (free_area_empty(area, fallback_mt))
 			continue;
 
-		if (can_steal_fallback(order, migratetype))
-			*can_steal = true;
-
-		if (!only_stealable)
-			return fallback_mt;
+		if (should_try_claim_block(order, migratetype))
+			*claim_block = true;
 
-		if (*can_steal)
+		if (*claim_block || !only_claim)
 			return fallback_mt;
 	}
 
@@ -1911,10 +2016,12 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 }
 
 /*
- * Reserve a pageblock for exclusive use of high-order atomic allocations if
- * there are no empty page blocks that contain a page with a suitable order
+ * Reserve the pageblock(s) surrounding an allocation request for
+ * exclusive use of high-order atomic allocations if there are no
+ * empty page blocks that contain a page with a suitable order
  */
-static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
+static void reserve_highatomic_pageblock(struct page *page, int order,
+					 struct zone *zone)
 {
 	int mt;
 	unsigned long max_managed, flags;
@@ -1940,10 +2047,16 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
 	/* Yoink! */
 	mt = get_pageblock_migratetype(page);
 	/* Only reserve normal pageblocks (i.e., they can merge with others) */
-	if (migratetype_is_mergeable(mt)) {
+	if (!migratetype_is_mergeable(mt))
+		goto out_unlock;
+
+	if (order < pageblock_order) {
+		if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
+			goto out_unlock;
 		zone->nr_reserved_highatomic += pageblock_nr_pages;
-		set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
-		move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
+	} else {
+		change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
+		zone->nr_reserved_highatomic += 1 << order;
 	}
 
 out_unlock:
@@ -1956,7 +2069,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
  * intense memory pressure but failed atomic allocations should be easier
  * to recover from than an OOM.
  *
- * If @force is true, try to unreserve a pageblock even though highatomic
+ * If @force is true, try to unreserve pageblocks even though highatomic
  * pageblock is exhausted.
  */
 static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
@@ -1968,7 +2081,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 	struct zone *zone;
 	struct page *page;
 	int order;
-	bool ret;
+	int ret;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
 								ac->nodemask) {
@@ -1983,30 +2096,22 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 		spin_lock_irqsave(&zone->lock, flags);
 		for (order = 0; order < NR_PAGE_ORDERS; order++) {
 			struct free_area *area = &(zone->free_area[order]);
+			unsigned long size;
 
 			page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
 			if (!page)
 				continue;
 
 			/*
-			 * In page freeing path, migratetype change is racy so
-			 * we can counter several free pages in a pageblock
-			 * in this loop although we changed the pageblock type
-			 * from highatomic to ac->migratetype. So we should
-			 * adjust the count once.
+			 * It should never happen but changes to
+			 * locking could inadvertently allow a per-cpu
+			 * drain to add pages to MIGRATE_HIGHATOMIC
+			 * while unreserving so be safe and watch for
+			 * underflows.
 			 */
-			if (is_migrate_highatomic_page(page)) {
-				/*
-				 * It should never happen but changes to
-				 * locking could inadvertently allow a per-cpu
-				 * drain to add pages to MIGRATE_HIGHATOMIC
-				 * while unreserving so be safe and watch for
-				 * underflows.
-				 */
-				zone->nr_reserved_highatomic -= min(
-						pageblock_nr_pages,
-						zone->nr_reserved_highatomic);
-			}
+			size = max(pageblock_nr_pages, 1UL << order);
+			size = min(size, zone->nr_reserved_highatomic);
+			zone->nr_reserved_highatomic -= size;
 
 			/*
 			 * Convert to ac->migratetype and avoid the normal
@@ -2017,10 +2122,24 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 			 * of pageblocks that cannot be completely freed
 			 * may increase.
 			 */
-			set_pageblock_migratetype(page, ac->migratetype);
-			ret = move_freepages_block(zone, page, ac->migratetype,
-									NULL);
-			if (ret) {
+			if (order < pageblock_order)
+				ret = move_freepages_block(zone, page,
+							   MIGRATE_HIGHATOMIC,
+							   ac->migratetype);
+			else {
+				move_to_free_list(page, zone, order,
+						  MIGRATE_HIGHATOMIC,
+						  ac->migratetype);
+				change_pageblock_range(page, order,
+						       ac->migratetype);
+				ret = 1;
+			}
+			/*
+			 * Reserving the block(s) already succeeded,
+			 * so this should not fail on zone boundaries.
+			 */
+			WARN_ON_ONCE(ret == -1);
+			if (ret > 0) {
 				spin_unlock_irqrestore(&zone->lock, flags);
 				return ret;
 			}
@@ -2032,17 +2151,15 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 }
 
 /*
- * Try finding a free buddy page on the fallback list and put it on the free
- * list of requested migratetype, possibly along with other pages from the same
- * block, depending on fragmentation avoidance heuristics. Returns true if
- * fallback was found so that __rmqueue_smallest() can grab it.
+ * Try to allocate from some fallback migratetype by claiming the entire block,
+ * i.e. converting it to the allocation's start migratetype.
  *
  * The use of signed ints for order and current_order is a deliberate
  * deviation from the rest of this file, to make the for loop
  * condition simpler.
  */
-static __always_inline bool
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
+static __always_inline struct page *
+__rmqueue_claim(struct zone *zone, int order, int start_migratetype,
 						unsigned int alloc_flags)
 {
 	struct free_area *area;
@@ -2050,7 +2167,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
 	int min_order = order;
 	struct page *page;
 	int fallback_mt;
-	bool can_steal;
+	bool claim_block;
 
 	/*
 	 * Do not steal pages from freelists belonging to other pageblocks
@@ -2069,62 +2186,71 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
 				--current_order) {
 		area = &(zone->free_area[current_order]);
 		fallback_mt = find_suitable_fallback(area, current_order,
-				start_migratetype, false, &can_steal);
+				start_migratetype, false, &claim_block);
 		if (fallback_mt == -1)
 			continue;
 
-		/*
-		 * We cannot steal all free pages from the pageblock and the
-		 * requested migratetype is movable. In that case it's better to
-		 * steal and split the smallest available page instead of the
-		 * largest available page, because even if the next movable
-		 * allocation falls back into a different pageblock than this
-		 * one, it won't cause permanent fragmentation.
-		 */
-		if (!can_steal && start_migratetype == MIGRATE_MOVABLE
-					&& current_order > order)
-			goto find_smallest;
+		if (!claim_block)
+			break;
 
-		goto do_steal;
+		page = get_page_from_free_area(area, fallback_mt);
+		page = try_to_claim_block(zone, page, current_order, order,
+					  start_migratetype, fallback_mt,
+					  alloc_flags);
+		if (page) {
+			trace_mm_page_alloc_extfrag(page, order, current_order,
+						    start_migratetype, fallback_mt);
+			return page;
+		}
 	}
 
-	return false;
+	return NULL;
+}
+
+/*
+ * Try to steal a single page from some fallback migratetype. Leave the rest of
+ * the block as its current migratetype, potentially causing fragmentation.
+ */
+static __always_inline struct page *
+__rmqueue_steal(struct zone *zone, int order, int start_migratetype)
+{
+	struct free_area *area;
+	int current_order;
+	struct page *page;
+	int fallback_mt;
+	bool claim_block;
 
-find_smallest:
 	for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
 		area = &(zone->free_area[current_order]);
 		fallback_mt = find_suitable_fallback(area, current_order,
-				start_migratetype, false, &can_steal);
-		if (fallback_mt != -1)
-			break;
-	}
-
-	/*
-	 * This should not happen - we already found a suitable fallback
-	 * when looking for the largest page.
-	 */
-	VM_BUG_ON(current_order > MAX_ORDER);
-
-do_steal:
-	page = get_page_from_free_area(area, fallback_mt);
-
-	steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
-								can_steal);
-
-	trace_mm_page_alloc_extfrag(page, order, current_order,
-		start_migratetype, fallback_mt);
+				start_migratetype, false, &claim_block);
+		if (fallback_mt == -1)
+			continue;
 
-	return true;
+		page = get_page_from_free_area(area, fallback_mt);
+		page_del_and_expand(zone, page, order, current_order, fallback_mt);
+		trace_mm_page_alloc_extfrag(page, order, current_order,
+					    start_migratetype, fallback_mt);
+		return page;
+	}
 
+	return NULL;
 }
 
+enum rmqueue_mode {
+	RMQUEUE_NORMAL,
+	RMQUEUE_CMA,
+	RMQUEUE_CLAIM,
+	RMQUEUE_STEAL,
+};
+
 /*
  * Do the hard work of removing an element from the buddy allocator.
  * Call me with the zone->lock already held.
  */
 static __always_inline struct page *
 __rmqueue(struct zone *zone, unsigned int order, int migratetype,
-						unsigned int alloc_flags)
+	  unsigned int alloc_flags, enum rmqueue_mode *mode)
 {
 	struct page *page;
 
@@ -2142,17 +2268,49 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 				return page;
 		}
 	}
-retry:
-	page = __rmqueue_smallest(zone, order, migratetype);
-	if (unlikely(!page)) {
-		if (alloc_flags & ALLOC_CMA)
-			page = __rmqueue_cma_fallback(zone, order);
 
-		if (!page && __rmqueue_fallback(zone, order, migratetype,
-								alloc_flags))
-			goto retry;
+	/*
+	 * First try the freelists of the requested migratetype, then try
+	 * fallbacks modes with increasing levels of fragmentation risk.
+	 *
+	 * The fallback logic is expensive and rmqueue_bulk() calls in
+	 * a loop with the zone->lock held, meaning the freelists are
+	 * not subject to any outside changes. Remember in *mode where
+	 * we found pay dirt, to save us the search on the next call.
+	 */
+	switch (*mode) {
+	case RMQUEUE_NORMAL:
+		page = __rmqueue_smallest(zone, order, migratetype);
+		if (page)
+			return page;
+		fallthrough;
+	case RMQUEUE_CMA:
+		if (alloc_flags & ALLOC_CMA) {
+			page = __rmqueue_cma_fallback(zone, order);
+			if (page) {
+				*mode = RMQUEUE_CMA;
+				return page;
+			}
+		}
+		fallthrough;
+	case RMQUEUE_CLAIM:
+		page = __rmqueue_claim(zone, order, migratetype, alloc_flags);
+		if (page) {
+			/* Replenished preferred freelist, back to normal mode. */
+			*mode = RMQUEUE_NORMAL;
+			return page;
+		}
+		fallthrough;
+	case RMQUEUE_STEAL:
+		if (!(alloc_flags & ALLOC_NOFRAGMENT)) {
+			page = __rmqueue_steal(zone, order, migratetype);
+			if (page) {
+				*mode = RMQUEUE_STEAL;
+				return page;
+			}
+		}
 	}
-	return page;
+	return NULL;
 }
 
 /*
@@ -2164,13 +2322,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list,
 			int migratetype, unsigned int alloc_flags)
 {
+	enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&zone->lock, flags);
 	for (i = 0; i < count; ++i) {
 		struct page *page = __rmqueue(zone, order, migratetype,
-								alloc_flags);
+					      alloc_flags, &rmqm);
 		if (unlikely(page == NULL))
 			break;
 
@@ -2185,12 +2344,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 		 * pages are ordered properly.
 		 */
 		list_add_tail(&page->pcp_list, list);
-		if (is_migrate_cma(get_pcppage_migratetype(page)))
-			__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
-					      -(1 << order));
 	}
-
-	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
 	spin_unlock_irqrestore(&zone->lock, flags);
 
 	return i;
@@ -2385,19 +2539,6 @@ void drain_all_pages(struct zone *zone)
 	__drain_all_pages(zone, false);
 }
 
-static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
-							unsigned int order)
-{
-	int migratetype;
-
-	if (!free_pages_prepare(page, order))
-		return false;
-
-	migratetype = get_pfnblock_migratetype(page, pfn);
-	set_pcppage_migratetype(page, migratetype);
-	return true;
-}
-
 static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free_high)
 {
 	int min_nr_free, max_nr_free;
@@ -2528,7 +2669,7 @@ void free_unref_page(struct page *page, unsigned int order)
 	struct per_cpu_pages *pcp;
 	struct zone *zone;
 	unsigned long pfn = page_to_pfn(page);
-	int migratetype, pcpmigratetype;
+	int migratetype;
 
 	if (page_from_dynamic_pool(page)) {
 		dynamic_pool_free_page(page);
@@ -2540,7 +2681,7 @@ void free_unref_page(struct page *page, unsigned int order)
 		return;
 	}
 
-	if (!free_unref_page_prepare(page, pfn, order))
+	if (!free_pages_prepare(page, order))
 		return;
 
 	/*
@@ -2550,23 +2691,23 @@ void free_unref_page(struct page *page, unsigned int order)
 	 * get those areas back if necessary. Otherwise, we may have to free
 	 * excessively into the page allocator
 	 */
-	migratetype = pcpmigratetype = get_pcppage_migratetype(page);
+	migratetype = get_pfnblock_migratetype(page, pfn);
 	if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
 		if (unlikely(is_migrate_isolate(migratetype))) {
-			free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+			free_one_page(page_zone(page), page, pfn, order, FPI_NONE);
 			return;
 		}
-		pcpmigratetype = MIGRATE_MOVABLE;
+		migratetype = MIGRATE_MOVABLE;
 	}
 
 	zone = page_zone(page);
 	pcp_trylock_prepare(UP_flags);
 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 	if (pcp) {
-		free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+		free_unref_page_commit(zone, pcp, page, migratetype, order);
 		pcp_spin_unlock(pcp);
 	} else {
-		free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+		free_one_page(zone, page, pfn, order, FPI_NONE);
 	}
 	pcp_trylock_finish(UP_flags);
 }
@@ -2579,7 +2720,7 @@ void free_unref_folios(struct folio_batch *folios)
 	unsigned long __maybe_unused UP_flags;
 	struct per_cpu_pages *pcp = NULL;
 	struct zone *locked_zone = NULL;
-	int i, j, migratetype;
+	int i, j;
 
 	/* Prepare folios for freeing */
 	for (i = 0, j = 0; i < folios->nr; i++) {
@@ -2592,18 +2733,16 @@ void free_unref_folios(struct folio_batch *folios)
 			continue;
 		}
 
-		if (!free_unref_page_prepare(&folio->page, pfn, order))
+		if (!free_pages_prepare(&folio->page, order))
 			continue;
 
 		/*
-		 * Free isolated folios and orders not handled on the PCP
-		 * directly to the allocator, see comment in free_unref_page.
+		 * Free orders not handled on the PCP directly to the
+		 * allocator.
 		 */
-		migratetype = get_pcppage_migratetype(&folio->page);
-		if (!pcp_allowed_order(order) ||
-		    is_migrate_isolate(migratetype)) {
-			free_one_page(folio_zone(folio), &folio->page, pfn,
-					order, migratetype, FPI_NONE);
+		if (!pcp_allowed_order(order)) {
+			free_one_page(folio_zone(folio), &folio->page,
+				      pfn, order, FPI_NONE);
 			continue;
 		}
 		folio->private = (void *)(unsigned long)order;
@@ -2616,16 +2755,31 @@ void free_unref_folios(struct folio_batch *folios)
 	for (i = 0; i < folios->nr; i++) {
 		struct folio *folio = folios->folios[i];
 		struct zone *zone = folio_zone(folio);
+		unsigned long pfn = folio_pfn(folio);
 		unsigned int order = (unsigned long)folio->private;
+		int migratetype;
 
 		folio->private = NULL;
-		migratetype = get_pcppage_migratetype(&folio->page);
+		migratetype = get_pfnblock_migratetype(&folio->page, pfn);
 
 		/* Different zone requires a different pcp lock */
-		if (zone != locked_zone) {
+		if (zone != locked_zone ||
+		    is_migrate_isolate(migratetype)) {
 			if (pcp) {
 				pcp_spin_unlock(pcp);
 				pcp_trylock_finish(UP_flags);
+				locked_zone = NULL;
+				pcp = NULL;
+			}
+
+			/*
+			 * Free isolated pages directly to the
+			 * allocator, see comment in free_unref_page.
+			 */
+			if (is_migrate_isolate(migratetype)) {
+				free_one_page(zone, &folio->page, pfn,
+					      order, FPI_NONE);
+				continue;
 			}
 
 			/*
@@ -2636,10 +2790,8 @@ void free_unref_folios(struct folio_batch *folios)
 			pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 			if (unlikely(!pcp)) {
 				pcp_trylock_finish(UP_flags);
-				free_one_page(zone, &folio->page,
-						folio_pfn(folio), order,
-						migratetype, FPI_NONE);
-				locked_zone = NULL;
+				free_one_page(zone, &folio->page, pfn,
+					      order, FPI_NONE);
 				continue;
 			}
 			locked_zone = zone;
@@ -2702,11 +2854,9 @@ int __isolate_free_page(struct page *page, unsigned int order)
 		watermark = zone->_watermark[WMARK_MIN] + (1UL << order);
 		if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
 			return 0;
-
-		__mod_zone_freepage_state(zone, -(1UL << order), mt);
 	}
 
-	del_page_from_free_list(page, zone, order);
+	del_page_from_free_list(page, zone, order, mt);
 
 	/*
 	 * Set the pageblock if the isolated page is at least half of a
@@ -2721,8 +2871,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
 			 * with others)
 			 */
 			if (migratetype_is_mergeable(mt))
-				set_pageblock_migratetype(page,
-							  MIGRATE_MOVABLE);
+				move_freepages_block(zone, page, mt,
+						     MIGRATE_MOVABLE);
 		}
 	}
 
@@ -2790,7 +2940,9 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 		if (alloc_flags & ALLOC_HIGHATOMIC)
 			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
 		if (!page) {
-			page = __rmqueue(zone, order, migratetype, alloc_flags);
+			enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
+
+			page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm);
 
 			/*
 			 * If the allocation fails, allow OOM handling and
@@ -2806,8 +2958,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
 				return NULL;
 			}
 		}
-		__mod_zone_freepage_state(zone, -(1 << order),
-					  get_pcppage_migratetype(page));
 		spin_unlock_irqrestore(&zone->lock, flags);
 	} while (check_new_pages(page, order));
 
@@ -2989,11 +3139,10 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
 
 	/*
 	 * If the caller does not have rights to reserves below the min
-	 * watermark then subtract the high-atomic reserves. This will
-	 * over-estimate the size of the atomic reserve but it avoids a search.
+	 * watermark then subtract the free pages reserved for highatomic.
 	 */
 	if (likely(!(alloc_flags & ALLOC_RESERVES)))
-		unusable_free += z->nr_reserved_highatomic;
+		unusable_free += READ_ONCE(z->nr_free_highatomic);
 
 #ifdef CONFIG_CMA
 	/* If allocation can't use CMA areas don't use free CMA pages */
@@ -3381,7 +3530,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * if the pageblock should be reserved for the future
 			 */
 			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
-				reserve_highatomic_pageblock(page, zone);
+				reserve_highatomic_pageblock(page, order, zone);
 
 			return page;
 		} else {
@@ -6506,9 +6655,8 @@ static void alloc_contig_dump_pages(struct list_head *page_list)
  * @migratetype: using migratetype to filter the type of migration in
  *		trace_mm_alloc_contig_migrate_range_info.
  */
-int __alloc_contig_migrate_range(struct compact_control *cc,
-					unsigned long start, unsigned long end,
-					int migratetype)
+static int __alloc_contig_migrate_range(struct compact_control *cc,
+		unsigned long start, unsigned long end, int migratetype)
 {
 	/* This function is based on compact_zone() from compaction.c. */
 	unsigned int nr_reclaimed;
@@ -6517,7 +6665,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
 	int ret = 0;
 	struct migration_target_control mtc = {
 		.nid = zone_to_nid(cc->zone),
-		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+		.gfp_mask = cc->gfp_mask,
 	};
 	struct page *page;
 	unsigned long total_mapped = 0;
@@ -6582,6 +6730,94 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
 	return (ret < 0) ? ret : 0;
 }
 
+static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
+{
+	int order;
+
+	for (order = 0; order < NR_PAGE_ORDERS; order++) {
+		struct page *page, *next;
+		int nr_pages = 1 << order;
+
+		list_for_each_entry_safe(page, next, &list[order], lru) {
+			int i;
+
+			post_alloc_hook(page, order, gfp_mask);
+			if (!order)
+				continue;
+
+			split_page(page, order);
+
+			/* Add all subpages to the order-0 head, in sequence. */
+			list_del(&page->lru);
+			for (i = 0; i < nr_pages; i++)
+				list_add_tail(&page[i].lru, &list[0]);
+		}
+	}
+}
+
+static void split_pages_to_order0(struct list_head *list)
+{
+	int order;
+
+	for (order = 1; order < NR_PAGE_ORDERS; order++) {
+		struct page *page, *next;
+		int nr_pages = 1 << order;
+
+		list_for_each_entry_safe(page, next, &list[order], lru) {
+			int i;
+
+			list_del(&page->lru);
+			for (i = 0; i < nr_pages; i++)
+				list_add_tail(&page[i].lru, &list[0]);
+		}
+	}
+}
+
+static void free_pfn_range(unsigned long start, unsigned long end, gfp_t gfp_mask)
+{
+	struct page *page;
+	unsigned long i;
+
+	page = pfn_to_page(start);
+	for (i = 0; i < end - start; ++i, ++page)
+		post_alloc_hook(page, 0, gfp_mask);
+	free_contig_range(start, end - start);
+}
+
+static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
+{
+	const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
+	const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
+				  __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO;
+	const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+
+	/*
+	 * We are given the range to allocate; node, mobility and placement
+	 * hints are irrelevant at this point. We'll simply ignore them.
+	 */
+	gfp_mask &= ~(GFP_ZONEMASK | __GFP_RECLAIMABLE | __GFP_WRITE |
+		      __GFP_HARDWALL | __GFP_THISNODE | __GFP_MOVABLE);
+
+	/*
+	 * We only support most reclaim flags (but not NOFAIL/NORETRY), and
+	 * selected action flags.
+	 */
+	if (gfp_mask & ~(reclaim_mask | action_mask))
+		return -EINVAL;
+
+	/*
+	 * Flags to control page compaction/migration/reclaim, to free up our
+	 * page range. Migratable pages are movable, __GFP_MOVABLE is implied
+	 * for them.
+	 *
+	 * Traditionally we always had __GFP_HARDWALL|__GFP_RETRY_MAYFAIL set,
+	 * keep doing that to not degrade callers.
+	 */
+	*gfp_cc_mask = (gfp_mask & (reclaim_mask | cc_action_mask)) |
+			__GFP_HARDWALL | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
+	return 0;
+}
+
 /**
  * alloc_contig_range() -- tries to allocate given range of pages
  * @start:	start PFN to allocate
@@ -6590,7 +6826,9 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
  *			#MIGRATE_MOVABLE or #MIGRATE_CMA).  All pageblocks
  *			in range must have the same migratetype and it must
  *			be either of the two.
- * @gfp_mask:	GFP mask to use during compaction
+ * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
+ *		action and reclaim modifiers are supported. Reclaim modifiers
+ *		control allocation behavior during compaction/migration/reclaim.
  *
  * The PFN range does not have to be pageblock aligned. The PFN range must
  * belong to a single zone.
@@ -6606,8 +6844,8 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
 int alloc_contig_range(unsigned long start, unsigned long end,
 		       unsigned migratetype, gfp_t gfp_mask)
 {
+	int range_order = ilog2(end - start);
 	unsigned long outer_start, outer_end;
-	int order;
 	int ret = 0;
 
 	struct compact_control cc = {
@@ -6617,11 +6855,22 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		.mode = MIGRATE_SYNC,
 		.ignore_skip_hint = true,
 		.no_set_skip_hint = true,
-		.gfp_mask = current_gfp_context(gfp_mask),
 		.alloc_contig = true,
 	};
 	INIT_LIST_HEAD(&cc.migratepages);
 
+	gfp_mask = current_gfp_context(gfp_mask);
+	if (__alloc_contig_verify_gfp_mask(gfp_mask, (gfp_t *)&cc.gfp_mask))
+		return -EINVAL;
+
+	/* __GFP_COMP may only be used for certain aligned+sized ranges. */
+	if ((gfp_mask & __GFP_COMP) &&
+		(!is_power_of_2(end - start) || !IS_ALIGNED(start, 1 << range_order))) {
+		WARN_ONCE(true, "PFN range: requested [%lu, %lu) is not suitable for __GFP_COMP\n",
+				start, end);
+		return -EINVAL;
+	}
+
 	/*
 	 * What we do here is we mark all pageblocks in range as
 	 * MIGRATE_ISOLATE.  Because pageblock and max order pages may
@@ -6690,29 +6939,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 	 * We don't have to hold zone->lock here because the pages are
 	 * isolated thus they won't get removed from buddy.
 	 */
-
-	order = 0;
-	outer_start = start;
-	while (!PageBuddy(pfn_to_page(outer_start))) {
-		if (++order > MAX_ORDER) {
-			outer_start = start;
-			break;
-		}
-		outer_start &= ~0UL << order;
-	}
-
-	if (outer_start != start) {
-		order = buddy_order(pfn_to_page(outer_start));
-
-		/*
-		 * outer_start page could be small order buddy page and
-		 * it doesn't include start page. Adjust outer_start
-		 * in this case to report failed page properly
-		 * on tracepoint in test_pages_isolated()
-		 */
-		if (outer_start + (1UL << order) <= start)
-			outer_start = start;
-	}
+	outer_start = find_large_buddy(start);
 
 	/* Make sure the range is really isolated. */
 	if (test_pages_isolated(outer_start, end, 0)) {
@@ -6727,12 +6954,34 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		goto done;
 	}
 
-	/* Free head and tail (if any) */
-	if (start != outer_start)
-		free_contig_range(outer_start, start - outer_start);
-	if (end != outer_end)
-		free_contig_range(end, outer_end - end);
+	/*
+	 * With __GFP_COMP and the requested order < MAX_PAGE_ORDER,
+	 * isolated free pages can have higher order than the requested
+	 * one. Use split_free_pages() to free out of range pages.
+	 */
+	if (!(gfp_mask & __GFP_COMP)) {
+		split_free_pages(cc.freepages, gfp_mask);
+
+		/* Free head and tail (if any) */
+		if (start != outer_start)
+			free_contig_range(outer_start, start - outer_start);
+		if (end != outer_end)
+			free_contig_range(end, outer_end - end);
+	} else {
+		struct page *head = pfn_to_page(start);
+
+		if ((outer_start != start) || (end != outer_end)) {
+			split_pages_to_order0(cc.freepages);
+			if (start != outer_start)
+				free_pfn_range(outer_start, start, gfp_mask);
 
+			if (end != outer_end)
+				free_pfn_range(end, outer_end, gfp_mask);
+		}
+
+		check_new_pages(head, range_order);
+		prep_new_page(head, range_order, gfp_mask, 0);
+	}
 done:
 	undo_isolate_page_range(start, end, migratetype);
 	return ret;
@@ -6782,7 +7031,9 @@ static bool zone_spans_last_pfn(const struct zone *zone,
 /**
  * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
  * @nr_pages:	Number of contiguous pages to allocate
- * @gfp_mask:	GFP mask to limit search and used during compaction
+ * @gfp_mask:	GFP mask. Node/zone/placement hints limit the search; only some
+ *		action and reclaim modifiers are supported. Reclaim modifiers
+ *		control allocation behavior during compaction/migration/reclaim.
  * @nid:	Target node
  * @nodemask:	Mask for other possible nodes
  *
@@ -6841,6 +7092,18 @@ struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
 void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 {
 	unsigned long count = 0;
+	struct folio *folio = pfn_folio(pfn);
+
+	if (folio_test_large(folio)) {
+		int expected = folio_nr_pages(folio);
+
+		if (nr_pages == expected)
+			folio_put(folio);
+		else
+			WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
+			     pfn, nr_pages, expected);
+		return;
+	}
 
 	for (; nr_pages--; pfn++) {
 		struct page *page = pfn_to_page(pfn);
@@ -6946,8 +7209,9 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 
 		BUG_ON(page_count(page));
 		BUG_ON(!PageBuddy(page));
+		VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE);
 		order = buddy_order(page);
-		del_page_from_free_list(page, zone, order);
+		del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE);
 		pfn += (1 << order);
 	}
 	spin_unlock_irqrestore(&zone->lock, flags);
@@ -6975,6 +7239,14 @@ bool is_free_buddy_page(struct page *page)
 EXPORT_SYMBOL(is_free_buddy_page);
 
 #ifdef CONFIG_MEMORY_FAILURE
+static inline void add_to_free_list(struct page *page, struct zone *zone,
+				    unsigned int order, int migratetype,
+				    bool tail)
+{
+	__add_to_free_list(page, zone, order, migratetype, tail);
+	account_freepages(zone, 1 << order, migratetype);
+}
+
 /*
  * Break down a higher-order page in sub-pages, and keep our target out of
  * buddy allocator.
@@ -6984,28 +7256,24 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
 				   int migratetype)
 {
 	unsigned long size = 1 << high;
-	struct page *current_buddy, *next_page;
+	struct page *current_buddy;
 
 	while (high > low) {
 		high--;
 		size >>= 1;
 
 		if (target >= &page[size]) {
-			next_page = page + size;
 			current_buddy = page;
+			page = page + size;
 		} else {
-			next_page = page;
 			current_buddy = page + size;
 		}
-		page = next_page;
 
-		if (set_page_guard(zone, current_buddy, high, migratetype))
+		if (set_page_guard(zone, current_buddy, high))
 			continue;
 
-		if (current_buddy != target) {
-			add_to_free_list(current_buddy, zone, high, migratetype);
-			set_buddy_order(current_buddy, high);
-		}
+		add_to_free_list(current_buddy, zone, high, migratetype, false);
+		set_buddy_order(current_buddy, high);
 	}
 }
 
@@ -7030,12 +7298,11 @@ bool take_page_off_buddy(struct page *page)
 			int migratetype = get_pfnblock_migratetype(page_head,
 								   pfn_head);
 
-			del_page_from_free_list(page_head, zone, page_order);
+			del_page_from_free_list(page_head, zone, page_order,
+						migratetype);
 			break_down_buddy_pages(zone, page_head, page, 0,
 						page_order, migratetype);
 			SetPageHWPoisonTakenOff(page);
-			if (!is_migrate_isolate(migratetype))
-				__mod_zone_freepage_state(zone, -1, migratetype);
 			ret = true;
 			break;
 		}
@@ -7052,13 +7319,14 @@ bool take_page_off_buddy(struct page *page)
 bool put_page_back_buddy(struct page *page)
 {
 	struct zone *zone = page_zone(page);
-	unsigned long pfn = page_to_pfn(page);
 	unsigned long flags;
-	int migratetype = get_pfnblock_migratetype(page, pfn);
 	bool ret = false;
 
 	spin_lock_irqsave(&zone->lock, flags);
 	if (put_page_testzero(page)) {
+		unsigned long pfn = page_to_pfn(page);
+		int migratetype = get_pfnblock_migratetype(page, pfn);
+
 		ClearPageHWPoisonTakenOff(page);
 		__free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
 		if (TestClearPageHWPoison(page)) {
@@ -7139,7 +7407,7 @@ static bool try_to_accept_memory_one(struct zone *zone)
 	list_del(&page->lru);
 	last = list_empty(&zone->unaccepted_pages);
 
-	__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+	account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
 	__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
@@ -7197,7 +7465,7 @@ static bool __free_unaccepted(struct page *page)
 	spin_lock_irqsave(&zone->lock, flags);
 	first = list_empty(&zone->unaccepted_pages);
 	list_add_tail(&page->lru, &zone->unaccepted_pages);
-	__mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+	account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
 	__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c634db34978366a4344cf449b883bd8ebedb4c5d..ddee0901c77f24dff395e37d1cd42b31918d445b 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -179,15 +179,11 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
 	unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
 			migratetype, isol_flags);
 	if (!unmovable) {
-		unsigned long nr_pages;
-		int mt = get_pageblock_migratetype(page);
-
-		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
+			spin_unlock_irqrestore(&zone->lock, flags);
+			return -EBUSY;
+		}
 		zone->nr_isolate_pageblock++;
-		nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
-									NULL);
-
-		__mod_zone_freepage_state(zone, -nr_pages, mt);
 		spin_unlock_irqrestore(&zone->lock, flags);
 		return 0;
 	}
@@ -207,7 +203,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
 static void unset_migratetype_isolate(struct page *page, int migratetype)
 {
 	struct zone *zone;
-	unsigned long flags, nr_pages;
+	unsigned long flags;
 	bool isolated_page = false;
 	unsigned int order;
 	struct page *buddy;
@@ -253,12 +249,15 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
 	 * allocation.
 	 */
 	if (!isolated_page) {
-		nr_pages = move_freepages_block(zone, page, migratetype, NULL);
-		__mod_zone_freepage_state(zone, nr_pages, migratetype);
-	}
-	set_pageblock_migratetype(page, migratetype);
-	if (isolated_page)
+		/*
+		 * Isolating this block already succeeded, so this
+		 * should not fail on zone boundaries.
+		 */
+		WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
+	} else {
+		set_pageblock_migratetype(page, migratetype);
 		__putback_isolated_page(page, order, migratetype);
+	}
 	zone->nr_isolate_pageblock--;
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
@@ -285,7 +284,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * within a free or in-use page.
  * @boundary_pfn:		pageblock-aligned pfn that a page might cross
  * @flags:			isolation flags
- * @gfp_flags:			GFP flags used for migrating pages
  * @isolate_before:	isolate the pageblock before the boundary_pfn
  * @skip_isolation:	the flag to skip the pageblock isolation in second
  *			isolate_single_pageblock()
@@ -304,8 +302,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * the in-use page then splitting the free page.
  */
 static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
-			gfp_t gfp_flags, bool isolate_before, bool skip_isolation,
-			int migratetype)
+		bool isolate_before, bool skip_isolation, int migratetype)
 {
 	unsigned long start_pfn;
 	unsigned long isolate_pageblock;
@@ -370,108 +367,52 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 
 		VM_BUG_ON(!page);
 		pfn = page_to_pfn(page);
-		/*
-		 * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
-		 * free pages in [start_pfn, boundary_pfn), its head page will
-		 * always be in the range.
-		 */
+
 		if (PageBuddy(page)) {
 			int order = buddy_order(page);
 
-			if (pfn + (1UL << order) > boundary_pfn) {
-				/* free page changed before split, check it again */
-				if (split_free_page(page, order, boundary_pfn - pfn))
-					continue;
-			}
+			/* move_freepages_block_isolate() handled this */
+			VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
 
 			pfn += 1UL << order;
 			continue;
 		}
+
 		/*
-		 * migrate compound pages then let the free page handling code
-		 * above do the rest. If migration is not possible, just fail.
+		 * If a compound page is straddling our block, attempt
+		 * to migrate it out of the way.
+		 *
+		 * We don't have to worry about this creating a large
+		 * free page that straddles into our block: gigantic
+		 * pages are freed as order-0 chunks, and LRU pages
+		 * (currently) do not exceed pageblock_order.
+		 *
+		 * The block of interest has already been marked
+		 * MIGRATE_ISOLATE above, so when migration is done it
+		 * will free its pages onto the correct freelists.
 		 */
 		if (PageCompound(page)) {
 			struct page *head = compound_head(page);
 			unsigned long head_pfn = page_to_pfn(head);
 			unsigned long nr_pages = compound_nr(head);
 
-			if (head_pfn + nr_pages <= boundary_pfn) {
+			if (head_pfn + nr_pages <= boundary_pfn ||
+			    PageHuge(page)) {
 				pfn = head_pfn + nr_pages;
 				continue;
 			}
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+
 			/*
-			 * hugetlb, lru compound (THP), and movable compound pages
-			 * can be migrated. Otherwise, fail the isolation.
+			 * These pages are movable too, but they're
+			 * not expected to exceed pageblock_order.
+			 *
+			 * Let us know when they do, so we can add
+			 * proper free and split handling for them.
 			 */
-			if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
-				int order;
-				unsigned long outer_pfn;
-				int page_mt = get_pageblock_migratetype(page);
-				bool isolate_page = !is_migrate_isolate_page(page);
-				struct compact_control cc = {
-					.nr_migratepages = 0,
-					.order = -1,
-					.zone = page_zone(pfn_to_page(head_pfn)),
-					.mode = MIGRATE_SYNC,
-					.ignore_skip_hint = true,
-					.no_set_skip_hint = true,
-					.gfp_mask = gfp_flags,
-					.alloc_contig = true,
-				};
-				INIT_LIST_HEAD(&cc.migratepages);
-
-				/*
-				 * XXX: mark the page as MIGRATE_ISOLATE so that
-				 * no one else can grab the freed page after migration.
-				 * Ideally, the page should be freed as two separate
-				 * pages to be added into separate migratetype free
-				 * lists.
-				 */
-				if (isolate_page) {
-					ret = set_migratetype_isolate(page, page_mt,
-						flags, head_pfn, head_pfn + nr_pages);
-					if (ret)
-						goto failed;
-				}
-
-				ret = __alloc_contig_migrate_range(&cc, head_pfn,
-							head_pfn + nr_pages, page_mt);
+			VM_WARN_ON_ONCE_PAGE(PageLRU(page), page);
+			VM_WARN_ON_ONCE_PAGE(__PageMovable(page), page);
 
-				/*
-				 * restore the page's migratetype so that it can
-				 * be split into separate migratetype free lists
-				 * later.
-				 */
-				if (isolate_page)
-					unset_migratetype_isolate(page, page_mt);
-
-				if (ret)
-					goto failed;
-				/*
-				 * reset pfn to the head of the free page, so
-				 * that the free page handling code above can split
-				 * the free page to the right migratetype list.
-				 *
-				 * head_pfn is not used here as a hugetlb page order
-				 * can be bigger than MAX_ORDER, but after it is
-				 * freed, the free page order is not. Use pfn within
-				 * the range to find the head of the free page.
-				 */
-				order = 0;
-				outer_pfn = pfn;
-				while (!PageBuddy(pfn_to_page(outer_pfn))) {
-					/* stop if we cannot find the free page */
-					if (++order > MAX_ORDER)
-						goto failed;
-					outer_pfn &= ~0UL << order;
-				}
-				pfn = outer_pfn;
-				continue;
-			} else
-#endif
-				goto failed;
+			goto failed;
 		}
 
 		pfn++;
@@ -541,7 +482,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	bool skip_isolation = false;
 
 	/* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
-	ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false,
+	ret = isolate_single_pageblock(isolate_start, flags, false,
 			skip_isolation, migratetype);
 	if (ret)
 		return ret;
@@ -550,7 +491,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 		skip_isolation = true;
 
 	/* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
-	ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true,
+	ret = isolate_single_pageblock(isolate_end, flags, true,
 			skip_isolation, migratetype);
 	if (ret) {
 		unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);