still keep kernel/early_res.c for the extension. should move those file to lib/lmb.c later? -v2: fix NO_BOOTMEM hang with printk Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 1 + arch/x86/include/asm/e820.h | 38 ++- arch/x86/include/asm/lmb.h | 8 + arch/x86/kernel/e820.c | 163 ++---------- arch/x86/kernel/head.c | 2 +- arch/x86/kernel/head32.c | 4 +- arch/x86/kernel/head64.c | 2 + arch/x86/kernel/setup.c | 2 + arch/x86/kernel/setup_percpu.c | 6 - include/linux/early_res.h | 9 +- include/linux/lmb.h | 5 +- kernel/early_res.c | 593 +++++++++++++++------------------------ lib/lmb.c | 11 +- mm/page_alloc.c | 2 +- mm/sparse-vmemmap.c | 4 +- 15 files changed, 317 insertions(+), 533 deletions(-) create mode 100644 arch/x86/include/asm/lmb.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a80bce..585f611 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES + select HAVE_LMB select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 01bc987..2b57ff6 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -113,22 +113,36 @@ static inline void early_memtest(unsigned long start, unsigned long end) extern unsigned long end_user_pfn; -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); #include <linux/early_res.h> +static inline u64 find_e820_area(u64 start, u64 end, u64 ...
Would be nice to track known TODO items - that way people dont feel that you are ignoring (or have missed) their feedback. For example are the list of 4 improvements i suggested in the previous mail planned, or do you disagree with some of that? Thanks, Ingo --
lmb_init() connect them back.
also add region_array_size in lmb_region to tack the region array size.
-v3: seperate lmb core change to seperated patch
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 5 ++++-
lib/lmb.c | 11 ++++++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
Index: linux-2.6/include/linux/lmb.h
===================================================================
--- linux-2.6.orig/include/linux/lmb.h
+++ linux-2.6/include/linux/lmb.h
@@ -26,7 +26,8 @@ struct lmb_property {
struct lmb_region {
unsigned long cnt;
u64 size;
- struct lmb_property region[MAX_LMB_REGIONS+1];
+ struct lmb_property *region;
+ unsigned long region_array_size;
};
struct lmb {
@@ -37,6 +38,8 @@ struct lmb {
};
extern struct lmb lmb;
+extern struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+extern struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
extern void __init lmb_init(void);
extern void __init lmb_analyze(void);
Index: linux-2.6/lib/lmb.c
===================================================================
--- linux-2.6.orig/lib/lmb.c
+++ linux-2.6/lib/lmb.c
@@ -18,6 +18,8 @@
#define LMB_ALLOC_ANYWHERE 0
struct lmb lmb;
+struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
static int lmb_debug;
@@ -106,6 +108,11 @@ static void lmb_coalesce_regions(struct
void __init lmb_init(void)
{
+ lmb.memory.region = lmb_memory_region;
+ lmb.memory.region_array_size = ARRAY_SIZE(lmb_memory_region);
+ lmb.reserved.region = lmb_reserved_region;
+ lmb.reserved.region_array_size = ARRAY_SIZE(lmb_reserved_region);
+
/* Create a dummy zero size LMB which will get coalesced away later.
* This simplifies the lmb_add() code below...
*/
@@ -169,7 +176,7 @@ static long lmb_add_region(struct lmb_re
if (coalesced)
return coalesced;
- if (rgn->cnt >= MAX_LMB_REGIONS)
+ if (rgn->cnt ...That's rather unreadable and has random whitespace noise. Should be something like: lmb.memory.region = lmb_memory_region; lmb.memory.region_array_size = ARRAY_SIZE(lmb_memory_region); lmb.reserved.region = lmb_reserved_region; lmb.reserved.region_array_size = ARRAY_SIZE(lmb_reserved_region); also, i'd suggest to shorten region_array_size to region_size (we know it's an array), so it would become: lmb.memory.region = lmb_memory_region; lmb.memory.region_size = ARRAY_SIZE(lmb_memory_region); lmb.reserved.region = lmb_reserved_region; 'x >= y-1' is equivalent to 'x > y', so that should be: if (rgn->cnt > rgn->region_size) Ingo --
I don't mean to be pedantic, but the LMB code already has a lot of region.size references so region_size looks a bit awkward. All of the accessors in linux/lmb.h use region_nr as the array index, so perhaps nr_regions would be less ambiguous. --
@@ -26,7 +26,8 @@ struct lmb_property {
struct lmb_region {
unsigned long cnt;
u64 size;
- struct lmb_property region[MAX_LMB_REGIONS+1];
+ struct lmb_property *region;
+ unsigned long region_array_size;
};
cnt is number of slots used.
size is memory size
can we use rgn_sz for region array size?
YH
--
I'm fully aware of what each of these things are, and I have no idea what No. --
I dislike those arrays anyways. See my other message about turning them into lists, which would get rid of capacity constraints completely. What do you think ? Cheers, Ben. --
lmb_init() connect them back.
also add nr_regions in lmb_region to tack the region array size.
-v3: seperate lmb core change to seperated patch
-v4: according to Ingo, change >= x -1 to > x
change region_array_size to nr_regions
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
include/linux/lmb.h | 5 ++++-
lib/lmb.c | 9 ++++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
Index: linux-2.6/include/linux/lmb.h
===================================================================
--- linux-2.6.orig/include/linux/lmb.h
+++ linux-2.6/include/linux/lmb.h
@@ -26,7 +26,8 @@ struct lmb_property {
struct lmb_region {
unsigned long cnt;
u64 size;
- struct lmb_property region[MAX_LMB_REGIONS+1];
+ struct lmb_property *region;
+ unsigned long nr_regions;
};
struct lmb {
@@ -37,6 +38,8 @@ struct lmb {
};
extern struct lmb lmb;
+extern struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+extern struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
extern void __init lmb_init(void);
extern void __init lmb_analyze(void);
Index: linux-2.6/lib/lmb.c
===================================================================
--- linux-2.6.orig/lib/lmb.c
+++ linux-2.6/lib/lmb.c
@@ -18,6 +18,8 @@
#define LMB_ALLOC_ANYWHERE 0
struct lmb lmb;
+struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1];
+struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1];
static int lmb_debug;
@@ -106,6 +108,11 @@ static void lmb_coalesce_regions(struct
void __init lmb_init(void)
{
+ lmb.memory.region = lmb_memory_region;
+ lmb.reserved.region = lmb_reserved_region;
+ lmb.memory.nr_regions = ARRAY_SIZE(lmb_memory_region);
+ lmb.reserved.nr_regions = ARRAY_SIZE(lmb_reserved_region);
+
/* Create a dummy zero size LMB which will get coalesced away later.
* This simplifies the lmb_add() code below...
*/
@@ -169,7 +176,7 @@ static long lmb_add_region(struct lmb_re
if (coalesced)
return ...still keep kernel/early_res.c for the extension.
should move those file to lib/lmb.c later?
in early_res.c
1. change find_e820_area_xxx, to find_lmb_area_xxx
2. e820_register_active_regions to lmb_register_active_regions.
3. reserve_early will call lmb_reserve directly.
4. free_early will call lmb_free directly.
5. remove functions that are used by old reserve_early and free_early
6. get_free_all_memory_range use lmb.reserved.
7. early_res_to_bootmem use lmb.reserved
8. add fill_lmb_memory() to fill lmb.memory according e820 RAM entries
-v2: fix NO_BOOTMEM hang with printk
-v4: add add_lmb_memory that could increase lmb.memory.region size
change region_array_size to nr_regions
make sure some find_lmb_area<_size> are called after fill_lmb_memory
todo:
1. make early_memtest to depend on early_res. and move it to mm/
2. make all lmb user to use extend early_res/nobootmem
3. merge lmb.c and early_res.c, move it to mm/
4. make other platform to use lmb/early_res/nobootmem
5. remove BOOTMEM related code
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/Kconfig | 1
arch/x86/include/asm/e820.h | 38 +-
arch/x86/include/asm/lmb.h | 8
arch/x86/kernel/check.c | 14
arch/x86/kernel/e820.c | 171 +----------
arch/x86/kernel/head.c | 2
arch/x86/kernel/head32.c | 5
arch/x86/kernel/head64.c | 2
arch/x86/kernel/setup.c | 9
arch/x86/kernel/setup_percpu.c | 6
arch/x86/mm/memtest.c | 5
arch/x86/mm/numa_64.c | 4
include/linux/early_res.h | 19 -
kernel/early_res.c | 631 ++++++++++++++++-------------------------
mm/page_alloc.c | 2
mm/sparse-vmemmap.c | 11
16 files changed, 344 insertions(+), 584 deletions(-)
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ ...2/2 introduce one new function that could double the array size
please check the v4.
the function rely on find_lmb_area().
it will check if there is enough space left, otherwise try to get new big array, and
copy old array to new array.
final function like:
static void __init __check_and_double_region_array(struct lmb_region *type,
struct lmb_property *static_region,
u64 ex_start, u64 ex_end)
{
u64 start, end, size, mem;
struct lmb_property *new, *old;
unsigned long rgnsz = type->nr_regions;
/* do we have enough slots left ? */
if ((rgnsz - type->cnt) > max_t(unsigned long, rgnsz/8, 2))
return;
old = type->region;
/* double it */
mem = -1ULL;
size = sizeof(struct lmb_property) * rgnsz * 2;
if (old == static_region)
start = 0;
else
start = __pa(old) + sizeof(struct lmb_property) * rgnsz;
end = ex_start;
if (start + size < end)
mem = find_lmb_area(start, end, size,
sizeof(struct lmb_property));
if (mem == -1ULL) {
start = ex_end;
end = get_max_mapped();
if (start + size < end)
mem = find_lmb_area(start, end, size, sizeof(struct lmb_property));
}
if (mem == -1ULL)
panic("can not find more space for lmb.reserved.region array");
new = __va(mem);
/* copy old to new */
memcpy(&new[0], &old[0], sizeof(struct lmb_property) * rgnsz);
memset(&new[rgnsz], 0, sizeof(struct lmb_property) * rgnsz);
memset(&old[0], 0, sizeof(struct lmb_property) * rgnsz);
type->region = new;
type->nr_regions = rgnsz * 2;
printk(KERN_DEBUG "lmb.reserved.region array is doubled to %ld at [%llx - %llx]\n",
type->nr_regions, mem, mem + size - 1);
/* reserve new array and free old one */
lmb_reserve(mem, sizeof(struct lmb_property) * rgnsz * 2);
if (old != static_region)
lmb_free(__pa(old), sizeof(struct lmb_property) * rgnsz);
}
void __init add_lmb_memory(u64 start, u64 end)
{
__check_and_double_region_array(&lmb.memory, &lmb_memory_region[0], start, end);
lmb_add(start, end - start);
}
void __init ...It's still bloody arrays with fixed sizes, arbitrary limits and arbitrary waste of BSS space ;-) To be honest, I much prefer my idea of linked lists... But I'll let others speak. I think your double array size looks more like a band-aid than a proper fix. If we are going to use LMB in the long run for bootmem, we need to properly address its capacity constraints, not just paper over the problem. Cheers, --
Is there a reason why this needs to be exported ? Thanks, tglx --
lmb_reserved_region is used in kernel/early_res.c::__check_and_double_early_res/subtract_early_res/early_res_to_bootmem later if those functions are moved lmb.c, we can make lmb_reserved_region to be static same thing to lmb_memory_region, that will be used by add_lmb_memory() later. YH --
I would strongly suggest for you to send the next (-v4) version to lkml only if it's 100% clean and complete in every known regard (with a good splitup and explanations in every commit), so that people dont repeat review or review an unclean aspect aspect that will go away in a next iteration. Thanks, Ingo --
still keep kernel/early_res.c for the extension. should move those file to lib/lmb.c later? in early_res.c 1. change find_e820_area_xxx, to find_lmb_area_xxx 2. e820_register_active_regions to lmb_register_active_regions. 3. reserve_early will call lmb_reserve directly. 4. free_early will call lmb_free directly. 5. remove functions that are used by old reserve_early and free_early 6. get_free_all_memory_range use lmb.reserved. 7. early_res_to_bootmem use lmb.reserved 8. add fill_lmb_memory() to fill lmb.memory according e820 RAM entries -v2: fix NO_BOOTMEM hang with printk Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 1 arch/x86/include/asm/e820.h | 38 +- arch/x86/include/asm/lmb.h | 8 arch/x86/kernel/e820.c | 163 +---------- arch/x86/kernel/head.c | 2 arch/x86/kernel/head32.c | 5 arch/x86/kernel/head64.c | 2 arch/x86/kernel/setup.c | 2 arch/x86/kernel/setup_percpu.c | 6 include/linux/early_res.h | 9 kernel/early_res.c | 592 +++++++++++++++-------------------------- mm/page_alloc.c | 2 mm/sparse-vmemmap.c | 4 13 files changed, 301 insertions(+), 533 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES + select HAVE_LMB select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS Index: linux-2.6/arch/x86/include/asm/e820.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/e820.h +++ linux-2.6/arch/x86/include/asm/e820.h @@ -113,22 +113,36 @@ static inline void early_memtest(unsigne extern unsigned long end_user_pfn; -extern ...
