Re: [PATCH 3/5] SLUB: Replace __builtin_return_address(0) with _RET_IP_.

Previous thread: rt2500pci: rt2500pci_bbp_read: Error - BBPCSR register busy. Read failed. by Alex Riesen on Tuesday, August 19, 2008 - 10:35 am. (3 messages)

Next thread: [PATCH] SGI UV: hardcode the TLB flush interrupt system vector by Cliff Wickman on Tuesday, August 19, 2008 - 10:51 am. (2 messages)
From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 10:43 am

This reverts commit 79cf3d5e207243eecb1c4331c569e17700fa08fa.

The reverted commit, while it fixed printk format warnings, it resulted in
marker-probe format mismatches. Another approach should be used to fix
these warnings.
---
 include/linux/kmemtrace.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
index a865064..2c33201 100644
--- a/include/linux/kmemtrace.h
+++ b/include/linux/kmemtrace.h
@@ -31,7 +31,7 @@ static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
 					     int node)
 {
 	trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu "
-		   "bytes_req %zu bytes_alloc %zu gfp_flags %lu node %d",
+		   "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d",
 		   type_id, call_site, (unsigned long) ptr,
 		   bytes_req, bytes_alloc, (unsigned long) gfp_flags, node);
 }
-- 
1.5.6.1

--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 10:43 am

Fix the problem "kmemtrace: fix printk format warnings" attempted to fix,
but resulted in marker-probe format mismatch warnings. Instead of carrying
size_t into probes, we get rid of it by casting to unsigned long, just as
we did with gfp_t.

This way, we don't need to change marker format strings and we don't have
to rely on other format specifiers like "%zu", making for consistent use
of more generic data types (since there are no format specifiers for
gfp_t, for example).

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 include/linux/kmemtrace.h |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
index 2c33201..5bea8ea 100644
--- a/include/linux/kmemtrace.h
+++ b/include/linux/kmemtrace.h
@@ -33,7 +33,8 @@ static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
 	trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu "
 		   "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d",
 		   type_id, call_site, (unsigned long) ptr,
-		   bytes_req, bytes_alloc, (unsigned long) gfp_flags, node);
+		   (unsigned long) bytes_req, (unsigned long) bytes_alloc,
+		   (unsigned long) gfp_flags, node);
 }
 
 static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
-- 
1.5.6.1

--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 10:43 am

This patch replaces __builtin_return_address(0) with _RET_IP_, since a
previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and
they're widely available now. This makes for shorter and easier to read
code.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 mm/slub.c |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 4f5b961..8f66782 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1612,14 +1612,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-	return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+	return slab_alloc(s, gfpflags, -1, (void *) _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-	return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+	return slab_alloc(s, gfpflags, node, (void *) _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
@@ -1730,7 +1730,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 
 	page = virt_to_head_page(x);
 
-	slab_free(s, page, x, __builtin_return_address(0));
+	slab_free(s, page, x, (void *) _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -2657,7 +2657,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, -1, __builtin_return_address(0));
+	return slab_alloc(s, flags, -1, (void *) _RET_IP_);
 }
 EXPORT_SYMBOL(__kmalloc);
 
@@ -2685,7 +2685,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, node, __builtin_return_address(0));
+	return slab_alloc(s, flags, node, (void *) _RET_IP_);
 }
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
@@ -2742,7 +2742,7 @@ void kfree(const void *x)
 		put_page(page);
 		return;
 ...
From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 10:43 am

This adds hooks for the SLUB allocator, to allow tracing with kmemtrace.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 include/linux/slub_def.h |   53 +++++++++++++++++++++++++++++++++++--
 mm/slub.c                |   65 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 109 insertions(+), 9 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b..dc28432 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <linux/kmemtrace.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+	return kmem_cache_alloc(s, gfpflags);
+}
+#endif
+
 static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 {
-	return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+	unsigned int order = get_order(size);
+	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+
+	kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+			     size, PAGE_SIZE << order, flags);
+
+	return ret;
 }
 
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
+	void *ret;
+
 	if (__builtin_constant_p(size)) {
 		if (size > PAGE_SIZE)
 			return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 			if (!s)
 				return ZERO_SIZE_PTR;
 
-			return kmem_cache_alloc(s, flags);
+			ret = kmem_cache_alloc_notrace(s, ...
From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 10:43 am

Corrected the ABI description and the kmemtrace usage guide. Thanks to
Randy Dunlap for noticing these errors.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 Documentation/ABI/testing/debugfs-kmemtrace |    2 +-
 Documentation/vm/kmemtrace.txt              |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
index a5ff9a6..5e6a92a 100644
--- a/Documentation/ABI/testing/debugfs-kmemtrace
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -63,7 +63,7 @@ Adding new data to the packet (features) is done at the end of the mandatory
 data:
 	Feature size	(2 byte)
 	Feature ID	(1 byte)
-	Feature data	(Feature size - 4 bytes)
+	Feature data	(Feature size - 3 bytes)
 
 
 Users:
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
index 75360b1..f656cac 100644
--- a/Documentation/vm/kmemtrace.txt
+++ b/Documentation/vm/kmemtrace.txt
@@ -61,7 +61,7 @@ III. Quick usage guide
 ======================
 
 1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
-CONFIG_KMEMTRACE and CONFIG_DEFAULT_ENABLED).
+CONFIG_KMEMTRACE and CONFIG_KMEMTRACE_DEFAULT_ENABLED).
 
 2) Get the userspace tool and build it:
 $ git-clone git://repo.or.cz/kmemtrace-user.git		# current repository
-- 
1.5.6.1

--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:10 pm

Applied, thanks!
--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:10 pm

Applied, thanks!
--

From: Christoph Lameter
Date: Tuesday, August 19, 2008 - 11:14 am

Could you get rid of the casts by changing the type of parameter of slab_alloc()?

--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 11:24 am

I just looked at it and it isn't a trivial change. slab_alloc() calls
other functions which expect a void ptr. Even if slab_alloc() were to
take an unsigned long and then cast it to a void ptr, other functions do
call slab_alloc() with void ptr arguments (so the casts would move
there).

I'd rather have this merged as it is and change things later, so that
kmemtrace gets some testing from Pekka and others. 

--

From: Christoph Lameter
Date: Tuesday, August 19, 2008 - 11:56 am

Well maybe this patch will do it then:

Subject: slub: Use _RET_IP and use "unsigned long" for kernel text addresses

Use _RET_IP_ instead of buildint_return_address() and make slub use unsigned long
instead of void * for addresses.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>

---
 mm/slub.c |   46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2008-08-19 11:40:52.332357770 -0700
+++ linux-2.6/mm/slub.c	2008-08-19 11:52:17.479064425 -0700
@@ -177,7 +177,7 @@ static LIST_HEAD(slab_caches);
  * Tracking user of a slab.
  */
 struct track {
-	void *addr;		/* Called from address */
+	unsigned long addr;	/* Called from address */
 	int cpu;		/* Was running on cpu */
 	int pid;		/* Pid context */
 	unsigned long when;	/* When did the operation occur */
@@ -366,7 +366,7 @@ static struct track *get_track(struct km
 }

 static void set_track(struct kmem_cache *s, void *object,
-				enum track_item alloc, void *addr)
+				enum track_item alloc, unsigned long addr)
 {
 	struct track *p;

@@ -390,8 +390,8 @@ static void init_tracking(struct kmem_ca
 	if (!(s->flags & SLAB_STORE_USER))
 		return;

-	set_track(s, object, TRACK_FREE, NULL);
-	set_track(s, object, TRACK_ALLOC, NULL);
+	set_track(s, object, TRACK_FREE, 0L);
+	set_track(s, object, TRACK_ALLOC, 0L);
 }

 static void print_track(const char *s, struct track *t)
@@ -399,7 +399,7 @@ static void print_track(const char *s, s
 	if (!t->addr)
 		return;

-	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
+	printk(KERN_ERR "INFO: %s in %lxS age=%lu cpu=%u pid=%d\n",
 		s, t->addr, jiffies - t->when, t->cpu, t->pid);
 }

@@ -865,7 +865,7 @@ static void setup_object_debug(struct km
 }

 static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
-						void *object, void *addr)
+						void *object, ...
From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 11:59 am

Heh, heh. I'm happy to take your patch or alternatively you can ACK mine 
(which is slightly different):

http://lkml.org/lkml/2008/8/19/336

--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 12:05 pm

It seems Pekka just submitted something like this. Though I think using 0L
should be replaced with 0UL to be fully correct.

Pekka, should I test one of these variants and resubmit, or will you
merge it by yourself?

--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:09 pm

I'm merging my patch to the kmemtrace branch now.


--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:09 pm

From: Christoph Lameter
Date: Tuesday, August 19, 2008 - 1:17 pm

True. Just had 10 minutes to do the patch. Can someone stitch all of the good
things from all three patches together?



--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 1:16 pm

I already did that:

http://git.kernel.org/?p=linux/kernel/git/penberg/slab-2.6.git;a=commitdiff;h=3c0a0d0e...

It's compiled-tested too. I had to do some changes to 
include/linux/slab.h as well.

		Pekka
--

From: Christoph Lameter
Date: Tuesday, August 19, 2008 - 1:23 pm

Please recompile with CONFIG_SLUB_DEBUG off to find some breakage.  F.e.

static inline int alloc_debug_processing(struct kmem_cache *s,
        struct page *page, void *object, void *addr) { return 0; }

static inline int free_debug_processing(struct kmem_cache *s,
        struct page *page, void *object, void *addr) { return 0; }
--

From: Pekka J Enberg
Date: Tuesday, August 19, 2008 - 11:50 am

Yeah. How about something like this?

		Pekka

From f4d0f88f5b460afb2fd5dff75d4fcc0033e85a48 Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Tue, 19 Aug 2008 20:43:25 +0300
Subject: [PATCH] SLUB: Replace __builtin_return_address(0) with _RET_IP_.

This patch replaces __builtin_return_address(0) with _RET_IP_, since a
previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and
they're widely available now. This makes for shorter and easier to read
code.

[penberg@cs.helsinki.fi: remove _RET_IP_ casts to void pointer]
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h |    4 ++--
 mm/slab.c            |    8 ++++----
 mm/slub.c            |   44 ++++++++++++++++++++++----------------------
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 5ff9676..5ebb8df 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -225,7 +225,7 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
  * request comes from.
  */
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
+extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, __builtin_return_address(0))
 #else
@@ -243,7 +243,7 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
  * allocation request comes from.
  */
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
+extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
 			__builtin_return_address(0))
diff --git a/mm/slab.c b/mm/slab.c
index e76eee4..6a0e633 ...
From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:24 pm

Applied, thanks!
--

From: Randy.Dunlap
Date: Tuesday, August 19, 2008 - 10:51 am

Such as what?

Can marker probes be fixed instead?

After seeing & fixing lots of various warnings in the last few days,
I'm thinking that people don't look at/heed warnings nowadays.  Sad.

-- 
~Randy
--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 10:54 am

Hi,

Check the next patch in the series, it provides the alternate fix.
I favor this approach more because it involves fewer changes and we
don't have to use things like "%zu" (which make data type size less
apparent).


	Cheers,
	Eduard

--

From: Mathieu Desnoyers
Date: Tuesday, August 19, 2008 - 11:16 am

Question :

Is this kmemtrace marker meant to be exposed to userspace ?

I suspect not. In all case, not directly. I expect in-kernel probes to
be connected on these markers which will get the arguments they need,
and maybe access the inner data structures. Anyhow, tracepoints should
be used for that, not markers. You can later put markers in the probes
which are themselves connected to tracepoints.

Tracepoints = in-kernel tracing API.

Markers = Data-formatting tracing API, meant to export the data either
to user-space in text or binary format.

See

http://git.kernel.org/?p=linux/kernel/git/compudj/linux-2.6-lttng.git;a=shortlog

tracepoint-related patches.


-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 11:32 am

I think we're ready to try tracepoints. Pekka, could you merge Mathieu's
tracepoints or otherwise provide a branch where I could submit a
--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:25 pm

Sorry, that's too much of a hassle for me. I'll happily take your 
conversion patch once tracepoints hit mainline. Mathieu, are you aiming 
for 2.6.28?
--

From: Mathieu Desnoyers
Date: Tuesday, August 19, 2008 - 1:23 pm

Probably. it's in -tip right now, and the new ftrace depends on it. So
at least 2.6.28 yes.

Mathieu

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
--

From: Frank Ch. Eigler
Date: Tuesday, August 19, 2008 - 11:47 am

FWIW, that was certainly not the intent of markers.  It was to try to
satisfy both sorts of uses with relative type-safety and a minimum of
code.  Tracepoints may be nice if one needs somewhat (how much?) more
performance, and is willing to burden someone else with the necessary
extra code (such as tracepoint-to-marker conversion modules) to expose
the same data to "userspace" tools like lttng/systemtap.

- FChE
--

From: Randy.Dunlap
Date: Tuesday, August 19, 2008 - 12:32 pm

%zu is regular C language.  I.e., I don't get the data type not being
apparent issue...

Maybe kmemtrace should just make everything be long long... :(

-- 
~Randy
--

From: Eduard - Gabriel Munteanu
Date: Tuesday, August 19, 2008 - 2:37 pm

Yes, they can be fixed, but the probe functions will likely show

Yes, I know. But I feel like using unsigned long is consistent with the
way we handle the call site pointers and gfp_t. Pointers are cast to
unsigned long (in _RET_IP_) and size_t's actual range and size is more
apparent if it's cast to unsigned long as well (since allocation sizes
should scale the same as pointers do, and we know sizeof(unsigned long)

--

From: Pekka Enberg
Date: Tuesday, August 19, 2008 - 12:27 pm

I simply dropped Randy's patch so the revert wasn't needed.
--

Previous thread: rt2500pci: rt2500pci_bbp_read: Error - BBPCSR register busy. Read failed. by Alex Riesen on Tuesday, August 19, 2008 - 10:35 am. (3 messages)

Next thread: [PATCH] SGI UV: hardcode the TLB flush interrupt system vector by Cliff Wickman on Tuesday, August 19, 2008 - 10:51 am. (2 messages)