the new lmb could be used to early_res in x86. Suggested by: David, Ben, and Thomas First three patches should go into 2.6.34 -v6: change sequence as requested by Thomas -v7: seperate them to more patches -v8: add boundary checking to make sure not free partial page. Please check them. Thanks Yinghai --
It will return free memory size in specified range.
We can not use memory_size - reserved_size here, because some reserved area
may not be in the scope of lmb.memory.region.
Use lmb.memory.region subtracting lmb.reserved.region to get free range array.
then count size of all free ranges.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 1 +
mm/lmb.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+), 0 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 019520a..51a8653 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -101,6 +101,7 @@ int get_free_all_memory_range(struct range **rangep, int nodeid);
void lmb_register_active_regions(int nid, unsigned long start_pfn,
unsigned long last_pfn);
u64 lmb_hole_size(u64 start, u64 end);
+u64 lmb_free_memory_size(u64 addr, u64 limit);
#include <asm/lmb.h>
diff --git a/mm/lmb.c b/mm/lmb.c
index 6f794ed..a98ab51 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -756,6 +756,57 @@ void __init lmb_to_bootmem(u64 start, u64 end)
}
#endif
+u64 __init lmb_free_memory_size(u64 addr, u64 limit)
+{
+ int i, count;
+ struct range *range;
+ int nr_range;
+ u64 final_start, final_end;
+ u64 free_size;
+
+ count = lmb.reserved.cnt * 2;
+
+ range = find_range_array(count);
+ nr_range = 0;
+
+ addr = PFN_UP(addr);
+ limit = PFN_DOWN(limit);
+
+ for (i = 0; i < lmb.memory.cnt; i++) {
+ struct lmb_property *r = &lmb.memory.region[i];
+
+ final_start = PFN_UP(r->base);
+ final_end = PFN_DOWN(r->base + r->size);
+ if (final_start >= final_end)
+ continue;
+ if (final_start >= limit || final_end <= addr)
+ continue;
+
+ nr_range = add_range(range, count, nr_range, final_start, final_end);
+ }
+ subtract_range(range, count, 0, addr);
+ subtract_range(range, count, limit, -1ULL);
+ for (i = 0; i < lmb.reserved.cnt; i++) {
+ struct lmb_property *r = &lmb.reserved.region[i];
+
+ final_start ...It will be used lmb_to_bootmem converting
It is an wrapper for reserve_bootmem, and x86 64bit is using special one.
Also clean up that version for x86_64. We don't need to take care of numa
path for that, bootmem can handle it how
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/mm/init_32.c | 6 ------
arch/x86/mm/init_64.c | 20 ++------------------
mm/bootmem.c | 6 ++++++
3 files changed, 8 insertions(+), 24 deletions(-)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 5cb3f0f..804bbe9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1069,9 +1069,3 @@ void mark_rodata_ro(void)
#endif
}
#endif
-
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
- int flags)
-{
- return reserve_bootmem(phys, len, flags);
-}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e9b040e..5ba6b0e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -798,13 +798,10 @@ void mark_rodata_ro(void)
#endif
+#ifndef CONFIG_NO_BOOTMEM
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
-#ifdef CONFIG_NUMA
- int nid, next_nid;
- int ret;
-#endif
unsigned long pfn = phys >> PAGE_SHIFT;
if (pfn >= max_pfn) {
@@ -820,21 +817,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
return -EFAULT;
}
- /* Should check here against the e820 map to avoid double free */
-#ifdef CONFIG_NUMA
- nid = phys_to_nid(phys);
- next_nid = phys_to_nid(phys + len - 1);
- if (nid == next_nid)
- ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
- else
- ret = reserve_bootmem(phys, len, flags);
-
- if (ret != 0)
- return ret;
-
-#else
reserve_bootmem(phys, len, flags);
-#endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE;
@@ -843,6 +826,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
return 0;
...From: Ian Campbell <ian.campbell@citrix.com> This avoids an infinite loop in free_early_partial(). Add a warning to free_early_partial to catch future problems. -v5: put back start > end back into WARN_ONCE() -v6: use one line for warning according to linus -v7: more test by Signed-off-by: Ian Campbell <ian.campbell@citrix.com> Signed-off-by: Yinghai Lu <yinghai@kernel.org> Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Tested-by: Joel Becker <joel.becker@oracle.com> Tested-by: Stanislaw Gruszka <sgruszka@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@elte.hu> --- kernel/early_res.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/kernel/early_res.c b/kernel/early_res.c index 3cb2c66..69bed5b 100644 --- a/kernel/early_res.c +++ b/kernel/early_res.c @@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end) struct early_res *r; int i; + if (start == end) + return; + + if (WARN_ONCE(start > end, "free_early_partial: wrong range [%#llx, %#llx]\n", start, end)) + return; + try_next: i = find_overlapped_early(start, end); if (i >= max_early_res) -- 1.6.4.2 --
I think you can drop the function name, it will be in the stack dump anyway. And that would get the line back into bounds :) Other than that, Acked-by: Johannes Weiner <hannes@cmpxchg.org> --
Thanks. is there any chance that dump stack only print out address instead of function name ? BTW, kernel/early_res.c will be replaced by lmb.c Yinghai --
I suppose so, lacking frame pointers, but you still have filename + linenumber + 'wrong range'. --
From: Ian Campbell <ian.campbell@citrix.com>
This avoids an infinite loop in free_early_partial().
Add a warning to free_early_partial to catch future problems.
-v5: put back start > end back into WARN_ONCE()
-v6: use one line for warning according to linus
-v7: more test by
-v8: remove the function name according to Johannes
WARN_ONCE will print that function name.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Joel Becker <joel.becker@oracle.com>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
kernel/early_res.c | 6 ++++++
1 file changed, 6 insertions(+)
Index: linux-2.6/kernel/early_res.c
===================================================================
--- linux-2.6.orig/kernel/early_res.c
+++ linux-2.6/kernel/early_res.c
@@ -333,6 +333,12 @@ void __init free_early_partial(u64 start
struct early_res *r;
int i;
+ if (start == end)
+ return;
+
+ if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end))
+ return;
+
try_next:
i = find_overlapped_early(start, end);
if (i >= max_early_res)
--
get_free_all_memory_range is for CONFIG_NO_BOOTMEM, and will be called by
free_all_memory_core_early().
It will use early_node_map aka active ranges subtract lmb.reserved to
get all free range.
-v2: Update with Jan Beulich's patch "fix allocation done in get_free_all_memory_range()", that one is for early_res.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Jan Beulich <jbeulich@novell.com>
---
include/linux/lmb.h | 2 +
mm/lmb.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 87 insertions(+), 1 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index f5071e1..9e2dcf5 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -93,6 +93,8 @@ u64 find_lmb_area_size(u64 start, u64 *sizep, u64 align);
u64 get_max_mapped(void);
void lmb_to_bootmem(u64 start, u64 end);
+struct range;
+int get_free_all_memory_range(struct range **rangep, int nodeid);
#include <asm/lmb.h>
diff --git a/mm/lmb.c b/mm/lmb.c
index 7a34f4a..f687a42 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -640,7 +640,91 @@ void __init free_lmb(u64 start, u64 end)
lmb_free(start, end - start);
}
-#ifndef CONFIG_NO_BOOTMEM
+static __init struct range *find_range_array(int count)
+{
+ u64 end, size, mem = -1ULL;
+ struct range *range;
+
+ size = sizeof(struct range) * count;
+ end = get_max_mapped();
+#ifdef MAX_DMA32_PFN
+ if (end > (MAX_DMA32_PFN << PAGE_SHIFT))
+ mem = find_lmb_area(MAX_DMA32_PFN << PAGE_SHIFT, end,
+ size, sizeof(struct range));
+#endif
+ if (mem == -1ULL)
+ mem = find_lmb_area(0, end, size, sizeof(struct range));
+ if (mem == -1ULL)
+ panic("can not find more space for range free");
+
+ range = __va(mem);
+ memset(range, 0, size);
+
+ return range;
+}
+
+#ifdef CONFIG_NO_BOOTMEM
+static void __init subtract_lmb_reserved(struct range *range, int az)
+{
+ int i, count;
+ u64 final_start, final_end;
+
+ /* Take out region array itself at first*/
+ if (lmb.reserved.region != ...lmb_init() will connect them back.
Add nr_regions in struct lmb_region to track region array size.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 3 ++-
mm/lmb.c | 9 ++++++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index f3d1433..e14ea8d 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -26,7 +26,8 @@ struct lmb_property {
struct lmb_region {
unsigned long cnt;
u64 size;
- struct lmb_property region[MAX_LMB_REGIONS+1];
+ struct lmb_property *region;
+ unsigned long nr_regions;
};
struct lmb {
diff --git a/mm/lmb.c b/mm/lmb.c
index b1fc526..65b62dc 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -18,6 +18,8 @@
#define LMB_ALLOC_ANYWHERE 0
struct lmb lmb;
+static struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+static struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
static int lmb_debug;
@@ -106,6 +108,11 @@ static void lmb_coalesce_regions(struct lmb_region *rgn,
void __init lmb_init(void)
{
+ lmb.memory.region = lmb_memory_region;
+ lmb.reserved.region = lmb_reserved_region;
+ lmb.memory.nr_regions = ARRAY_SIZE(lmb_memory_region);
+ lmb.reserved.nr_regions = ARRAY_SIZE(lmb_reserved_region);
+
/* Create a dummy zero size LMB which will get coalesced away later.
* This simplifies the lmb_add() code below...
*/
@@ -169,7 +176,7 @@ static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
if (coalesced)
return coalesced;
- if (rgn->cnt >= MAX_LMB_REGIONS)
+ if (rgn->cnt > rgn->nr_regions)
return -1;
/* Couldn't coalesce the LMB, so add it to the sorted table. */
--
1.6.4.2
--
lmb_memory_size() will return memory size in lmb.memory.region.
lmb_free_memory_size() will return free memory size in lmb.memory.region.
So We can get exact reseved size.
Set the size right after initmem_init(), because later bootmem API will
get area from 16M. (except some fallback).
Later after we remove the bootmem, We could call that just before paging_init().
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/include/asm/e820.h | 2 ++
arch/x86/kernel/e820.c | 17 +++++++++++++++++
arch/x86/kernel/setup.c | 1 +
arch/x86/mm/init_64.c | 7 -------
4 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index de6cd06..334281f 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,6 +117,8 @@ extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
void init_lmb_memory(void);
void fill_lmb_memory(void);
+void find_lmb_dma_reserve(void);
+
extern void finish_e820_parsing(void);
extern void e820_reserve_resources(void);
extern void e820_reserve_resources_late(void);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 78ba396..e0257b8 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1100,3 +1100,20 @@ void __init fill_lmb_memory(void)
lmb_analyze();
lmb_dump_all();
}
+
+void __init find_lmb_dma_reserve(void)
+{
+#ifdef CONFIG_X86_64
+ u64 free_size_pfn;
+ u64 mem_size_pfn;
+ /*
+ * need to find out used area below MAX_DMA_PFN
+ * need to use lmb to get free size in [0, MAX_DMA_PFN]
+ * at first, and assume boot_mem will not take below MAX_DMA_PFN
+ */
+ mem_size_pfn = lmb_memory_size(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ free_size_pfn = lmb_free_memory_size(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ set_dma_reserve(mem_size_pfn - free_size_pfn);
+#endif
+}
+
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f0a32a4..6b62d9b ...Some areas from firmware could be reserved several times from different callers.
If these area are overlapped, We may have overlapped entries in lmb.reserved.
Try to free the area at first, before rerserve them again.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 1 +
mm/lmb.c | 27 +++++++++++++++++++++++++--
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 285f287..1e11fe0 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -85,6 +85,7 @@ lmb_end_pfn(struct lmb_region *type, unsigned long region_nr)
}
void reserve_lmb(u64 start, u64 end, char *name);
+void reserve_lmb_overlap_ok(u64 start, u64 end, char *name);
void free_lmb(u64 start, u64 end);
void add_lmb_memory(u64 start, u64 end);
u64 __find_lmb_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
diff --git a/mm/lmb.c b/mm/lmb.c
index 0dab3b6..20ab927 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -615,6 +615,12 @@ void __init add_lmb_memory(u64 start, u64 end)
lmb_add(start, end - start);
}
+static void __init __reserve_lmb(u64 start, u64 end, char *name)
+{
+ __check_and_double_region_array(&lmb.reserved, &lmb_reserved_region[0], start, end);
+ lmb_reserve(start, end - start);
+}
+
void __init reserve_lmb(u64 start, u64 end, char *name)
{
if (start == end)
@@ -623,8 +629,25 @@ void __init reserve_lmb(u64 start, u64 end, char *name)
if (WARN_ONCE(start > end, "reserve_lmb: wrong range [%#llx, %#llx]\n", start, end))
return;
- __check_and_double_region_array(&lmb.reserved, &lmb_reserved_region[0], start, end);
- lmb_reserve(start, end - start);
+ __reserve_lmb(start, end, name);
+}
+
+/*
+ * Could be used to avoid having overlap entries in lmb.reserved.region.
+ * Don't need to use it with area that is from find_lmb_area()
+ * Only use it for the area that fw hidden area.
+ */
+void __init reserve_lmb_overlap_ok(u64 start, u64 end, char *name)
+{
+ if (start == ...to workaround wrong BIOS map.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/kernel/e820.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e0257b8..89b8bf9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -903,6 +903,47 @@ static int __init parse_memmap_opt(char *p)
}
early_param("memmap", parse_memmap_opt);
+static void __init e820_align_ram_page(void)
+{
+ int i;
+ bool changed = false;;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *entry = &e820.map[i];
+ u64 start, end;
+ u64 start_aligned, end_aligned;
+
+ if (entry->type != E820_RAM)
+ continue;
+
+ start = entry->addr;
+ end = start + entry->size;
+
+ start_aligned = round_up(start, PAGE_SIZE);
+ end_aligned = round_down(end, PAGE_SIZE);
+
+ if (end_aligned <= start_aligned) {
+ e820_update_range(start, end - start, E820_RAM, E820_RESERVED);
+ changed = true;
+ continue;
+ }
+ if (start < start_aligned) {
+ e820_update_range(start, start_aligned - start, E820_RAM, E820_RESERVED);
+ changed = true;
+ }
+ if (end_aligned < end) {
+ e820_update_range(end_aligned, end - end_aligned, E820_RAM, E820_RESERVED);
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ sanitize_e820_map();
+ printk(KERN_INFO "aligned physical RAM map:\n");
+ e820_print_map("aligned");
+ }
+}
+
void __init finish_e820_parsing(void)
{
if (userdef) {
@@ -915,6 +956,9 @@ void __init finish_e820_parsing(void)
printk(KERN_INFO "user-defined physical RAM map:\n");
e820_print_map("user");
}
+
+ /* In case, We have RAM entres that are not PAGE aligned */
+ e820_align_ram_page();
}
static inline const char *e820_type_to_string(int e820_type)
--
1.6.4.2
--
1.include linux/lmb.h directly. so later could reduce e820.h reference. 2 this patch is done by sed scripts mainly Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/include/asm/efi.h | 2 +- arch/x86/kernel/acpi/sleep.c | 5 +++-- arch/x86/kernel/apic/numaq_32.c | 3 ++- arch/x86/kernel/efi.c | 5 +++-- arch/x86/kernel/head32.c | 4 ++-- arch/x86/kernel/head64.c | 4 ++-- arch/x86/kernel/setup.c | 26 +++++++++++++------------- arch/x86/kernel/trampoline.c | 6 +++--- arch/x86/mm/init.c | 5 +++-- arch/x86/mm/init_32.c | 10 ++++++---- arch/x86/mm/init_64.c | 9 +++++---- arch/x86/mm/k8topology_64.c | 4 +++- arch/x86/mm/memtest.c | 7 +++---- arch/x86/mm/numa_32.c | 17 +++++++++-------- arch/x86/mm/numa_64.c | 32 ++++++++++++++++---------------- arch/x86/mm/srat_32.c | 3 ++- arch/x86/mm/srat_64.c | 9 +++++---- arch/x86/xen/mmu.c | 5 +++-- arch/x86/xen/setup.c | 3 ++- mm/bootmem.c | 4 ++-- 20 files changed, 88 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8406ed7..f756536 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,7 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern int add_efi_memmap; -extern void efi_reserve_early(void); +extern void efi_reserve_lmb(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f996103..0cabfaa 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -7,6 +7,7 @@ #include <linux/acpi.h> #include <linux/bootmem.h> +#include <linux/lmb.h> #include <linux/dmi.h> #include ...
and some functions in e820.c that are not used anymore Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/include/asm/e820.h | 14 - arch/x86/kernel/e820.c | 41 --- include/linux/early_res.h | 23 -- kernel/early_res.c | 584 ------------------------------------------- 4 files changed, 0 insertions(+), 662 deletions(-) delete mode 100644 include/linux/early_res.h delete mode 100644 kernel/early_res.c diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 396c849..de6cd06 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -111,32 +111,18 @@ static inline void early_memtest(unsigned long start, unsigned long end) } #endif -extern unsigned long end_user_pfn; - -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); - extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); - extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); void init_lmb_memory(void); void fill_lmb_memory(void); - extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); -void reserve_early(u64 start, u64 end, char *name); -void free_early(u64 start, u64 end); - /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 54d478d..78ba396 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -727,14 +727,6 @@ static int __init e820_mark_nvs_memory(void) ...
It will try find area according with size/align in specified range (start, end).
Need use it find correct buffer for new lmb.reserved.region.
also make it more easy for x86 to use lmb.
x86 early_res is using find/reserve pattern instead of alloc.
find_lmb_area() will hohor goal
When we need temperary buff for range array etc for range work, if We are using
lmb_alloc(), We will need to add some post fix code for buffer that is used
by range array, because it is in the lmb.reserved already.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 4 ++
mm/lmb.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+), 0 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index e14ea8d..05234bd 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -83,6 +83,10 @@ lmb_end_pfn(struct lmb_region *type, unsigned long region_nr)
lmb_size_pages(type, region_nr);
}
+u64 __find_lmb_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+ u64 size, u64 align);
+u64 find_lmb_area(u64 start, u64 end, u64 size, u64 align);
+
#include <asm/lmb.h>
#endif /* __KERNEL__ */
diff --git a/mm/lmb.c b/mm/lmb.c
index 65b62dc..d5d5dc4 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -11,9 +11,13 @@
*/
#include <linux/kernel.h>
+#include <linux/types.h>
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/lmb.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/range.h>
#define LMB_ALLOC_ANYWHERE 0
@@ -546,3 +550,80 @@ int lmb_find(struct lmb_property *res)
}
return -1;
}
+
+static int __init find_overlapped_early(u64 start, u64 end)
+{
+ int i;
+ struct lmb_property *r;
+
+ for (i = 0; i < lmb.reserved.cnt && lmb.reserved.region[i].size; i++) {
+ r = &lmb.reserved.region[i];
+ if (end > r->base && start < (r->base + r->size))
+ break;
+ }
+
+ return i;
+}
+
+/* Check for already reserved areas */
+static ...1. replace find_e820_area with find_lmb_area 2. replace reserve_early with reserve_lmb 3. replace free_early with free_lmb. 4. NO_BOOTMEM will switch to use lmb too. 5. use _e820, _early wrap in the patch, in following patch, will replace them all 6. because free_lmb support partial free, we can remove some special care 7. Need to make sure that find_lmb_area() is called after fill_lmb_memory() so adjust some calling later in setup.c::setup_arch() -- corruption_check and mptable_update Suggested-by: David S. Miller <davem@davemloft.net> Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 9 +-- arch/x86/include/asm/e820.h | 15 +++-- arch/x86/include/asm/lmb.h | 8 ++ arch/x86/kernel/check.c | 14 ++-- arch/x86/kernel/e820.c | 139 +++++++++++----------------------------- arch/x86/kernel/head.c | 3 +- arch/x86/kernel/head32.c | 6 +- arch/x86/kernel/head64.c | 3 + arch/x86/kernel/mpparse.c | 5 +- arch/x86/kernel/setup.c | 10 ++- arch/x86/kernel/setup_percpu.c | 6 -- arch/x86/mm/numa_64.c | 5 +- kernel/Makefile | 1 - mm/bootmem.c | 1 + mm/page_alloc.c | 35 ++-------- mm/sparse-vmemmap.c | 11 --- 16 files changed, 97 insertions(+), 174 deletions(-) create mode 100644 arch/x86/include/asm/lmb.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a80bce..3117de5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES + select HAVE_LMB select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS @@ -192,9 +193,6 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ...
It will return memory size in specified range according to lmb.memory.region
Try to share some code with lmb_free_memory_size() by passing get_free to
__lmb_memory_size().
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 1 +
mm/lmb.c | 18 +++++++++++++++++-
2 files changed, 18 insertions(+), 1 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 51a8653..285f287 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -102,6 +102,7 @@ void lmb_register_active_regions(int nid, unsigned long start_pfn,
unsigned long last_pfn);
u64 lmb_hole_size(u64 start, u64 end);
u64 lmb_free_memory_size(u64 addr, u64 limit);
+u64 lmb_memory_size(u64 addr, u64 limit);
#include <asm/lmb.h>
diff --git a/mm/lmb.c b/mm/lmb.c
index a98ab51..0dab3b6 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -756,7 +756,7 @@ void __init lmb_to_bootmem(u64 start, u64 end)
}
#endif
-u64 __init lmb_free_memory_size(u64 addr, u64 limit)
+static u64 __init __lmb_memory_size(u64 addr, u64 limit, bool get_free)
{
int i, count;
struct range *range;
@@ -786,6 +786,10 @@ u64 __init lmb_free_memory_size(u64 addr, u64 limit)
}
subtract_range(range, count, 0, addr);
subtract_range(range, count, limit, -1ULL);
+
+ /* Subtract lmb.reserved.region in range ? */
+ if (!get_free)
+ goto sort_and_count_them;
for (i = 0; i < lmb.reserved.cnt; i++) {
struct lmb_property *r = &lmb.reserved.region[i];
@@ -798,6 +802,8 @@ u64 __init lmb_free_memory_size(u64 addr, u64 limit)
subtract_range(range, count, final_start, final_end);
}
+
+sort_and_count_them:
nr_range = clean_sort_range(range, count);
free_size = 0;
@@ -807,6 +813,16 @@ u64 __init lmb_free_memory_size(u64 addr, u64 limit)
return free_size << PAGE_SHIFT;
}
+u64 __init lmb_free_memory_size(u64 addr, u64 limit)
+{
+ return __lmb_memory_size(addr, limit, true);
+}
+
+u64 __init lmb_memory_size(u64 addr, u64 limit)
+{
+ return ...that is memory related, so move to mm/ according to Ingo
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
lib/Makefile | 2 --
mm/Makefile | 2 ++
{lib => mm}/lmb.c | 0
3 files changed, 2 insertions(+), 2 deletions(-)
rename {lib => mm}/lmb.c (100%)
diff --git a/lib/Makefile b/lib/Makefile
index 2e152ae..a463a4d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -85,8 +85,6 @@ obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
-obj-$(CONFIG_HAVE_LMB) += lmb.o
-
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
diff --git a/mm/Makefile b/mm/Makefile
index 7a68d2a..df22fd1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,8 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
$(mmu-y)
obj-y += init-mm.o
+obj-$(CONFIG_HAVE_LMB) += lmb.o
+
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HAS_DMA) += dmapool.o
diff --git a/lib/lmb.c b/mm/lmb.c
similarity index 100%
rename from lib/lmb.c
rename to mm/lmb.c
--
1.6.4.2
--
When CONFIG_NO_BOOTMEM, it could use memory more effient, or more compact.
Example is:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could use shared with other user.
When free_init_pages are called for initrd or .init, the page could be freed
could have chance to corrupt other data.
code segment in free_init_pages()
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
Try to make the boundaries to be page aligned.
-v2: make the original initramdisk to be aligned, according to Johannes.
otherwise we have chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompresser.
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
arch/x86/kernel/head32.c | 1 +
arch/x86/kernel/head64.c | 1 +
arch/x86/kernel/setup.c | 40 +++++++++++++++++++++++-----------------
arch/x86/mm/init.c | 24 +++++++++++++++++++++++-
4 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index adedeef..fe3d953 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -47,6 +47,7 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ ramdisk_end = PFN_UP(ramdisk_end) << ...Those appear to be a lot of spurious changes. We don't need to copy the alignment padding as well, so it only matters that the Maybe realign only for when it is not aligned? So to keep the fixup out of line. I suppose WARN_ON() is enough as it will print a stack trace which --
we need to reserve that whole page, otherwise other user may use the same page. we need to PFN_DOWN the end, and don't free the partial page. otherwise could crash the system. So just print out the trace, and system still can be used. reporter to get dmesg if no PAGE_ALIGN is bad name, it is not clear UP or DOWN. Thanks Yinghai --
Why would you convert to a pfn just to go back to a physical address,
when all you wanted is align at the next page boundary? It looks
rather clumsy.
PAGE_ALIGN() is pretty wide spread and I think it's clear to most
I know that. And you would know that I know that, if you actually
Dito.
if (WARN_ON(not_aligned)) {
align
}
/me goes arguing with M-x doctor, that thing actually acts on the
supplied input.
--
we may need to clean up all those: round_up round_down roundup ALIGN ok i got it. YH --
When CONFIG_NO_BOOTMEM, it could use memory more effient, or more compact.
Example is:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could use shared with other user.
When free_init_pages are called for initrd or .init, the page could be freed
could have chance to corrupt other data.
code segment in free_init_pages()
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
Try to make the boundaries to be page aligned.
-v2: make the original initramdisk to be aligned, according to Johannes.
otherwise we have chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompresser.
-v3: change to WARN_ON instead according to Johannes.
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
arch/x86/kernel/head32.c | 1 +
arch/x86/kernel/head64.c | 1 +
arch/x86/kernel/setup.c | 40 +++++++++++++++++++++++-----------------
arch/x86/mm/init.c | 24 +++++++++++++++++++++++-
4 files changed, 48 insertions(+), 18 deletions(-)
Index: linux-2.6/arch/x86/mm/init.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init.c
+++ linux-2.6/arch/x86/mm/init.c
@@ -332,6 +332,16 @@ int devmem_is_allowed(unsigned long page
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
unsigned long addr = ...Hi,
Here is what I had in mind when I wrote what you did not read, maybe diff
works better?
Main differences:
o only fix the area allocation in relocate_initrd(), no need to do
copy the alignment bits
o keep alignment fixups in free_init_pages() out of line
o use PAGE_SIZE(); you might dislike the name, it is still the proper
operation here. if you want to fix it, please do it properly
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index adedeef..ec7c672 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -46,7 +46,8 @@ void __init i386_start_kernel(void)
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
- u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
+
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b5a9896..a26a8fd 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -105,7 +105,9 @@ void __init x86_64_start_reservations(char *real_mode_data)
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image +
+ ramdisk_size);
+
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ca3f8fa..0594923 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -317,13 +317,14 @@ static void __init relocate_initrd(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 area_size = ...something wrong here, if someone pass (0x10, 0x20), the will be aligned to [0x1000, 0] you will get dead loop will update that. YH --
You are right! It should be enough to move the alignment fixup above the addr >= end Hannes --
please check.
I keep the comments about we are safe to use PAGE_ALIGN() with the initrd_end.
Thanks
Yinghai
Subject: [PATCH -v4] x86: Make sure free_init_pages() free pages in boundary
When CONFIG_NO_BOOTMEM, it could use memory more effient, or more compact.
Example is:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could use shared with other user.
When free_init_pages are called for initrd or .init, the page could be freed
could have chance to corrupt other data.
code segment in free_init_pages()
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
Try to make the boundaries to be page aligned.
-v2: make the original initramdisk to be aligned, according to Johannes.
otherwise we have chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompresser.
-v3: change to WARN_ON instead according to Johannes.
-v4: use PAGE_ALIGN according to Johannes.
We may fix that MARCO name later to PAGE_ALIGN_UP, and PAGE_ALIGN_DOWN
Add comments about assuming ramdisk start is aligned
in relocate_initrd(), change to re get ramdisk_image instead of save it
to make diff smaller.
Add WARN for wrong range according Johannes.
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
arch/x86/kernel/head32.c | 3 ++-
arch/x86/kernel/head64.c | 3 ++-
...Initrd memory is freed late and page-wise, so allocate full pages and
not share the last one with somebody else.
Also add a warning and a fixup in free_init_pages() to catch unaligned
ranges more explicitely in the future.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
arch/x86/kernel/head32.c | 8 +++++++-
arch/x86/kernel/head64.c | 8 +++++++-
arch/x86/kernel/setup.c | 14 ++++++++++----
arch/x86/mm/init.c | 12 ++++++++----
4 files changed, 32 insertions(+), 10 deletions(-)
I am really fed up with you replying to one point of an email and
skipping over five others. So here is my version of the patch,
the maintainers can choose.
Differences:
o only align the allocation area size, no need to also copy
alignment bits in relocate_initrd()
o keep the alignment fixup in free_init_pages() out of line and
self-contained
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index adedeef..086392a 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -46,7 +46,13 @@ void __init i386_start_kernel(void)
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
- u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ u64 ramdisk_end;
+ /*
+ * Initrd memory is freed late and page-wise, so make
+ * sure not to share a page with other users. Assume
+ * start is aligned.
+ */
+ ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b5a9896..b2c2321 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -105,7 +105,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
unsigned long ...why calulate them two times ?
also prefer to put the fat comments in one place.
also we need to align begin too, so don't confuse set_memory_np() and set_memory_rw().
Thanks
Yinghai
Subject: [PATCH -v6] x86: Make sure free_init_pages() free pages in boundary
When CONFIG_NO_BOOTMEM, it could use memory more effient, or more compact.
Example is:
Allocated new RAMDISK: 00ec2000 - 0248ce57
Move RAMDISK from 000000002ea04000 - 000000002ffcee56 to 00ec2000 - 0248ce56
The new RAMDISK's end is not page aligned.
Last page could use shared with other user.
When free_init_pages are called for initrd or .init, the page could be freed
could have chance to corrupt other data.
code segment in free_init_pages()
| for (; addr < end; addr += PAGE_SIZE) {
| ClearPageReserved(virt_to_page(addr));
| init_page_count(virt_to_page(addr));
| memset((void *)(addr & ~(PAGE_SIZE-1)),
| POISON_FREE_INITMEM, PAGE_SIZE);
| free_page(addr);
| totalram_pages++;
| }
last half page could be used as one whole free page.
Try to make the boundaries to be page aligned.
-v2: make the original initramdisk to be aligned, according to Johannes.
otherwise we have chance to lose one page.
we still need to keep initrd_end not aligned, otherwise it could
confuse decompresser.
-v3: change to WARN_ON instead according to Johannes.
-v4: use PAGE_ALIGN according to Johannes.
We may fix that MARCO name later to PAGE_ALIGN_UP, and PAGE_ALIGN_DOWN
Add comments about assuming ramdisk start is aligned
in relocate_initrd(), change to re get ramdisk_image instead of save it
to make diff smaller.
Add WARN for wrong range according Johannes.
-v6: remove one WARN
We need align begin in free_init_pages()
not copy more than ramdisk_size according to Johannes
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Yinghai Lu ...So We don't need to take e820.map with it.
Also change e820_saved to initdata to get some bytes memory back.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/include/asm/e820.h | 5 ++---
arch/x86/kernel/e820.c | 26 ++++++++++++++++++--------
arch/x86/kernel/efi.c | 2 +-
arch/x86/kernel/setup.c | 10 +++++-----
arch/x86/xen/setup.c | 4 +---
5 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index ec8a52d..0457c49 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -75,15 +75,14 @@ struct e820map {
#ifdef __KERNEL__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map e820;
-extern struct e820map e820_saved;
extern unsigned long pci_mem_start;
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern int e820_all_mapped(u64 start, u64 end, unsigned type);
extern void e820_add_region(u64 start, u64 size, int type);
extern void e820_print_map(char *who);
-extern int
-sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
+int sanitize_e820_map(void);
+void save_e820_map(void);
extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
unsigned new_type);
extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440..0eb9830 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -35,7 +35,7 @@
* next kernel with full memory.
*/
struct e820map e820;
-struct e820map e820_saved;
+static struct e820map __initdata e820_saved;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0xaeedbabe;
@@ -224,7 +224,7 @@ void __init e820_print_map(char *who)
* ______________________4_
*/
-int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
+static int __init __sanitize_e820_map(struct e820entry ...lmb_to_bootmem() will reserve lmb.reserved.region in bootmem
We can use it to with all arches that support lmb.
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 2 ++
mm/lmb.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 34 insertions(+), 0 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 7301072..f5071e1 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -92,6 +92,8 @@ u64 find_lmb_area(u64 start, u64 end, u64 size, u64 align);
u64 find_lmb_area_size(u64 start, u64 *sizep, u64 align);
u64 get_max_mapped(void);
+void lmb_to_bootmem(u64 start, u64 end);
+
#include <asm/lmb.h>
#endif /* __KERNEL__ */
diff --git a/mm/lmb.c b/mm/lmb.c
index a91f48d..7a34f4a 100644
--- a/mm/lmb.c
+++ b/mm/lmb.c
@@ -640,6 +640,38 @@ void __init free_lmb(u64 start, u64 end)
lmb_free(start, end - start);
}
+#ifndef CONFIG_NO_BOOTMEM
+void __init lmb_to_bootmem(u64 start, u64 end)
+{
+ int i, count;
+ u64 final_start, final_end;
+
+ /* Take out region array itself */
+ if (lmb.reserved.region != lmb_reserved_region)
+ lmb_free(__pa(lmb.reserved.region), sizeof(struct lmb_property) * lmb.reserved.nr_regions);
+
+ count = lmb.reserved.cnt;
+ pr_info( "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", count, start, end);
+ for (i = 0; i < count; i++) {
+ struct lmb_property *r = &lmb.reserved.region[i];
+ pr_info(" #%d [%010llx - %010llx] ", i, r->base, r->base + r->size);
+ final_start = max(start, r->base);
+ final_end = min(end, r->base + r->size);
+ if (final_start >= final_end) {
+ pr_cont("\n");
+ continue;
+ }
+ pr_cont(" ==> [%010llx - %010llx]\n", final_start, final_end);
+ reserve_bootmem_generic(final_start, final_end - final_start, BOOTMEM_DEFAULT);
+ }
+ /* Clear them to avoid misusing ? */
+ memset(&lmb.reserved.region[0], 0, sizeof(struct lmb_property) * lmb.reserved.nr_regions);
+ lmb.reserved.region = NULL;
+ lmb.reserved.nr_regions = ...