new early_res is lmb extension. and it could be used to replace bootmem. even could share some code before bootmem, like register_active_region, and lm_reserved_to_bootmem add test version for powerpc Thanks Yinghai --
still keep kernel/early_res.c for the extension.
should move those file to lib/lmb.c later?
in early_res.c
1. change find_e820_area_xxx, to find_lmb_area_xxx
2. e820_register_active_regions to lmb_register_active_regions.
3. reserve_early will call lmb_reserve directly.
4. free_early will call lmb_free directly.
5. remove functions that are used by old reserve_early and free_early
6. get_free_all_memory_range use lmb.reserved.
7. early_res_to_bootmem use lmb.reserved
8. add fill_lmb_memory() to fill lmb.memory according e820 RAM entries
-v2: fix NO_BOOTMEM hang with printk
-v4: add add_lmb_memory that could increase lmb.memory.region size
change region_array_size to nr_regions
make sure some find_lmb_area<_size> are called after fill_lmb_memory
todo:
1. make early_memtest to depend on early_res. and move it to mm/
2. make all lmb user to use extend early_res/nobootmem
3. merge lmb.c and early_res.c, move it to mm/
4. make other platform to use lmb/early_res/nobootmem
5. remove BOOTMEM related code
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/e820.h | 38 ++-
arch/x86/include/asm/lmb.h | 8 +
arch/x86/kernel/check.c | 14 +-
arch/x86/kernel/e820.c | 171 ++---------
arch/x86/kernel/head.c | 2 +-
arch/x86/kernel/head32.c | 5 +-
arch/x86/kernel/head64.c | 2 +
arch/x86/kernel/setup.c | 9 +-
arch/x86/kernel/setup_percpu.c | 6 -
arch/x86/mm/memtest.c | 5 +-
arch/x86/mm/numa_64.c | 4 +-
include/linux/early_res.h | 19 +-
kernel/early_res.c | 631 ++++++++++++++++------------------------
mm/page_alloc.c | 2 +-
mm/sparse-vmemmap.c | 11 -
16 files changed, 344 insertions(+), 584 deletions(-)
create mode 100644 arch/x86/include/asm/lmb.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a80bce..585f611 100644
--- ...lmb_init() connect them back.
also add nr_regions in lmb_region to tack the region array size.
-v3: seperate lmb core change to seperated patch
-v4: according to Ingo, change >= x -1 to > x
change region_array_size to nr_regions
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 5 ++++-
lib/lmb.c | 9 ++++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index f3d1433..37fd29e 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -26,7 +26,8 @@ struct lmb_property {
struct lmb_region {
unsigned long cnt;
u64 size;
- struct lmb_property region[MAX_LMB_REGIONS+1];
+ struct lmb_property *region;
+ unsigned long nr_regions;
};
struct lmb {
@@ -37,6 +38,8 @@ struct lmb {
};
extern struct lmb lmb;
+extern struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+extern struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
extern void __init lmb_init(void);
extern void __init lmb_analyze(void);
diff --git a/lib/lmb.c b/lib/lmb.c
index b1fc526..ed4e433 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -18,6 +18,8 @@
#define LMB_ALLOC_ANYWHERE 0
struct lmb lmb;
+struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
static int lmb_debug;
@@ -106,6 +108,11 @@ static void lmb_coalesce_regions(struct lmb_region *rgn,
void __init lmb_init(void)
{
+ lmb.memory.region = lmb_memory_region;
+ lmb.reserved.region = lmb_reserved_region;
+ lmb.memory.nr_regions = ARRAY_SIZE(lmb_memory_region);
+ lmb.reserved.nr_regions = ARRAY_SIZE(lmb_reserved_region);
+
/* Create a dummy zero size LMB which will get coalesced away later.
* This simplifies the lmb_add() code below...
*/
@@ -169,7 +176,7 @@ static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
if (coalesced)
return coalesced;
- if (rgn->cnt >= ...so we could not take e820.map directly.
and could change e820_saved to initdata
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/include/asm/e820.h | 5 ++---
arch/x86/kernel/e820.c | 26 ++++++++++++++++++--------
arch/x86/kernel/efi.c | 2 +-
arch/x86/kernel/setup.c | 10 +++++-----
arch/x86/xen/setup.c | 4 +---
5 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 41553af..01bc987 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -75,15 +75,14 @@ struct e820map {
#ifdef __KERNEL__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map e820;
-extern struct e820map e820_saved;
extern unsigned long pci_mem_start;
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern int e820_all_mapped(u64 start, u64 end, unsigned type);
extern void e820_add_region(u64 start, u64 size, int type);
extern void e820_print_map(char *who);
-extern int
-sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
+int sanitize_e820_map(void);
+void save_e820_map(void);
extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
unsigned new_type);
extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 05ee724..0c7143b 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -35,7 +35,7 @@
* next kernel with full memory.
*/
struct e820map e820;
-struct e820map e820_saved;
+static struct e820map __initdata e820_saved;
/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0xaeedbabe;
@@ -224,7 +224,7 @@ void __init e820_print_map(char *who)
* ______________________4_
*/
-int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
+static int __init __sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
...new early_res is extension for lmb, and it could be used to replace bootmem
also even could be used to simplify the bootmem path if needed.
--NOT TESTED--
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/powerpc/Kconfig | 14 ++++++++++++++
arch/powerpc/mm/mem.c | 23 +++++++++++++++++++++--
arch/powerpc/mm/numa.c | 11 ++++++++++-
3 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2e19500..307c3ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -52,6 +52,9 @@ config HAVE_SETUP_PER_CPU_AREA
config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool PPC64
+config HAVE_EARLY_RES
+ def_bool y
+
config IRQ_PER_CPU
bool
default y
@@ -443,6 +446,17 @@ config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on (SMP && PPC_PSERIES) || PPC_PS3
+config NO_BOOTMEM
+ default y
+ bool "Disable Bootmem code"
+ ---help---
+ Use early_res directly instead of bootmem before slab is ready.
+ - allocator (buddy) [generic]
+ - early allocator (bootmem) [generic]
+ - very early allocator (reserve_early*()/lmb)
+ So reduce one layer between early allocator to final allocator
+
+
config ARCH_POPULATES_NODE_MAP
def_bool y
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 448f972..7004f9e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -179,9 +179,11 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range);
void __init do_init_bootmem(void)
{
unsigned long i;
- unsigned long start, bootmap_pages;
unsigned long total_pages;
+#ifndef CONFIG_NO_BOOTMEM
+ unsigned long start, bootmap_pages;
int boot_mapsize;
+#endif
max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
@@ -189,7 +191,9 @@ void __init do_init_bootmem(void)
total_pages = total_lowmem >> PAGE_SHIFT;
max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
#endif
+ min_low_pfn = MEMORY_START >> PAGE_SHIFT;
...David Miller pointed out that early_res have problem to find node data on correct node
when we have
node0: [0, 2g), [4g, 6g), [10g, 14g)
node1: [6g, 10g), [14g, 18g)
the cross node case
the problem is there for x86 bits even before we are using early_res for bootmem replacement.
after early_res for bootmem replacement, alloc_bootmem_node still can get range on correct node
this patch is fixing problem before bootmem or early_res replacement for bootmem.
now only user is for x86 64bit numa to find node data.
the point is use early_node_map with find_e820_area_node()
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/include/asm/e820.h | 1 +
arch/x86/kernel/e820.c | 15 +++++++++++++++
arch/x86/mm/numa_64.c | 4 ++--
include/linux/mm.h | 2 ++
mm/page_alloc.c | 37 +++++++++++++++++++++++--------------
5 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index ec8a52d..41553af 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -116,6 +116,7 @@ extern unsigned long end_user_pfn;
extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align);
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
#include <linux/early_res.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 740b440..05ee724 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -787,6 +787,21 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
return -1ULL;
}
+u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align)
+{
+ u64 addr;
+ /*
+ * need to call this function after e820_register_active_regions
+ * so early_node_map[] is set
+ */
+ addr = find_memory_core_early(nid, size, align, start, end);
+ if ...Why not do what we suggested: extend lmb.c to also offer the early_res-equivalent functionality, and remove kernel/early_res.c and move lib/lmb.c to mm/lmb.c? We dont want two facilities (early_res and lmb) really ... Ingo --
Exactly. This series seems to leave us with kernel/early_res.c (which should never have moved out of arch/x86 AFAICS) - as well as using some of lmb.c. It doesn't compile on powerpc, because early_res.c is looking for find_fw_memmap_area() which is still under arch/x86. And sprinkling CONFIG_NO_BOOTMEM everywhere is getting a bit gross. I notice that's already happened to mm/(bootmem|page_alloc).c but propagating it further is not nice. cheers
the new version... http://git.kernel.org/?p=linux/kernel/git/yinghai/linux-2.6-yinghai.git;a=blob;f=kerne... it looks that you are looking at old version somehow. please check for converting smoothly, before we remove bootmem core, we can keep it. and can compare bootmem and nobootmem side by side. Thanks Yinghai --
the new early_res.c is extension to lmb. it will 1. add find_lmb_area 2. add check_double_region array with find_lmb_area 3. free_early and reserve_early is wrapper for lmb_free, and lmb_reserve with array size checking 4. some other generizing function like lmb_register_active_regions and lmb_hole_size that could be all lmb users. so it is seperated file at point. if lmb guys are happy, we can merge early_res.c and lmb.c. just put them in one file. Thanks Yinghai --
| Jesse Barnes | Re: [stable] [BUG][PATCH] cpqphp: fix kernel NULL pointer dereference |
| Greg KH | [003/136] p54usb: add Zcomax XG-705A usbid |
| Magnus Damm | [PATCH 03/07] ARM: Use shared GIC entry macros on Realview |
| Oliver Neukum | Re: [Bug #13682] The webcam stopped working when upgrading from 2.6.29 to 2.6.30 |
| Martin Schwidefsky | Re: [PATCH] optimized ktime_get[_ts] for GENERIC_TIME=y |
git: | |
| Junio C Hamano | Re: Some advanced index playing |
| Jeff King | Re: confusion over the new branch and merge config |
| Robin Rosenberg | Re: cvs2svn conversion directly to git ready for experimentation |
| Linus Torvalds | git binary size... |
| Ævar Arnfjörð Bjarmason | Re: Challenge with Git-Bash |
| Linux Kernel Mailing List | md: move allocation of ->queue from mddev_find to md_probe |
| Linux Kernel Mailing List | md: raid0: Represent zone->zone_offset in sectors. |
