Ingo,
Please pull the perf/core branch that can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git
perf/core
Thanks,
Frederic
---
Frederic Weisbecker (6):
perf: Drop unappropriate tests on arch callchains
perf: Generalize callchain_store()
perf: Generalize some arch callchain code
perf: Factorize callchain context handling
perf: Fix race in callchains
perf: Humanize the number of contexts
Namhyung Kim (1):
perf, tracing: add missing __percpu markups
arch/arm/kernel/perf_event.c | 62 +--------
arch/powerpc/kernel/perf_callchain.c | 86 ++++--------
arch/sh/kernel/perf_callchain.c | 50 +------
arch/sparc/kernel/perf_event.c | 69 +++------
arch/x86/kernel/cpu/perf_event.c | 82 +++--------
include/linux/ftrace_event.h | 4 +-
include/linux/perf_event.h | 30 +++-
kernel/perf_event.c | 259 ++++++++++++++++++++++++++++++----
kernel/trace/trace_event_perf.c | 21 ++--
kernel/trace/trace_functions_graph.c | 2 +-
10 files changed, 342 insertions(+), 323 deletions(-)
--
Drop the TASK_RUNNING test on user tasks for callchains as this check doesn't seem to make any sense. Also remove the tests for !current that is not supposed to happen and current->pid as this should be handled at the generic level, with exclude_idle attribute. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Tested-by: Will Deacon <will.deacon@arm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Stephane Eranian <eranian@google.com> Cc: David Miller <davem@davemloft.net> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Borislav Petkov <bp@amd64.org> --- arch/arm/kernel/perf_event.c | 6 ------ arch/sh/kernel/perf_callchain.c | 3 --- arch/x86/kernel/cpu/perf_event.c | 3 --- 3 files changed, 0 insertions(+), 12 deletions(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 417c392..fdcb0be 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3107,12 +3107,6 @@ perf_do_callchain(struct pt_regs *regs, is_user = user_mode(regs); - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - if (!is_user) perf_callchain_kernel(regs, entry); diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index a9dd3ab..1d6dbce 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -68,9 +68,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) is_user = user_mode(regs); - if (is_user && current->state != TASK_RUNNING) - return; - /* * Only the kernel side is implemented for now. */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f2da20f..4a4d191 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1703,9 +1703,6 @@ perf_do_callchain(struct pt_regs ...
callchain_store() is the same on every archs, inline it in
perf_event.h and rename it to perf_callchain_store() to avoid
any collision.
This removes repetitive code.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
arch/arm/kernel/perf_event.c | 15 +++---------
arch/powerpc/kernel/perf_callchain.c | 40 ++++++++++++----------------------
arch/sh/kernel/perf_callchain.c | 11 ++------
arch/sparc/kernel/perf_event.c | 26 ++++++++-------------
arch/x86/kernel/cpu/perf_event.c | 20 ++++++-----------
include/linux/perf_event.h | 7 ++++++
6 files changed, 45 insertions(+), 74 deletions(-)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index fdcb0be..a07c3b1 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3001,13 +3001,6 @@ arch_initcall(init_hw_perf_events);
/*
* Callchain handling code.
*/
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
- u64 ip)
-{
- if (entry->nr < PERF_MAX_STACK_DEPTH)
- entry->ip[entry->nr++] = ip;
-}
/*
* The registers we're interested in are at the end of the variable
@@ -3039,7 +3032,7 @@ user_backtrace(struct frame_tail *tail,
if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
return NULL;
- callchain_store(entry, buftail.lr);
+ perf_callchain_store(entry, buftail.lr);
/*
* Frame pointers should strictly progress back up the stack
@@ -3057,7 +3050,7 @@ perf_callchain_user(struct pt_regs *regs,
{
struct frame_tail *tail;
- callchain_store(entry, ...Store the kernel and user contexts from the generic layer instead
of archs, this gathers some repetitive code.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
arch/arm/kernel/perf_event.c | 2 --
arch/powerpc/kernel/perf_callchain.c | 3 ---
arch/sh/kernel/perf_callchain.c | 1 -
arch/sparc/kernel/perf_event.c | 3 ---
arch/x86/kernel/cpu/perf_event.c | 2 --
kernel/perf_event.c | 5 ++++-
6 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 0e3bbdb..64ca8c3 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3049,7 +3049,6 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct frame_tail *tail;
- perf_callchain_store(entry, PERF_CONTEXT_USER);
tail = (struct frame_tail *)regs->ARM_fp - 1;
@@ -3076,7 +3075,6 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct stackframe fr;
- perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
fr.fp = regs->ARM_fp;
fr.sp = regs->ARM_sp;
fr.lr = regs->ARM_lr;
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index f7a85ed..d05ae42 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -57,7 +57,6 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
lr = regs->link;
sp = regs->gpr[1];
- perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
perf_callchain_store(entry, regs->nip);
if ...Now that software events don't have interrupt disabled anymore in
the event path, callchains can nest on any context. So seperating
nmi and others contexts in two buffers has become racy.
Fix this by providing one buffer per nesting level. Given the size
of the callchain entries (2040 bytes * 4), we now need to allocate
them dynamically.
v2: Fixed put_callchain_entry call after recursion.
Fix the type of the recursion, it must be an array.
v3: Use a manual pr cpu allocation (temporary solution until NMIs
can safely access vmalloc'ed memory).
Do a better separation between callchain reference tracking and
allocation. Make the "put" path lockless for non-release cases.
v4: Protect the callchain buffers with rcu.
v5: Do the cpu buffers allocations node affine.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David Miller <davem@davemloft.net>
Cc: Borislav Petkov <bp@amd64.org>
---
arch/x86/kernel/cpu/perf_event.c | 22 +--
include/linux/perf_event.h | 1 -
kernel/perf_event.c | 298 +++++++++++++++++++++++++++++---------
3 files changed, 238 insertions(+), 83 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a3c9222..8e91cf3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1608,6 +1608,11 @@ static const struct stacktrace_ops backtrace_ops = {
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return NULL;
+ }
+
perf_callchain_store(entry, regs->ip);
dump_trace(NULL, regs, NULL, ...From: Namhyung Kim <namhyung@gmail.com>
ftrace_event_call->perf_events, perf_trace_buf,
fgraph_data->cpu_data and some local variables are percpu pointers
missing __percpu markups. Add them.
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1281498479-28551-1-git-send-email-namhyung@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
include/linux/ftrace_event.h | 4 ++--
kernel/trace/trace_event_perf.c | 15 ++++++++-------
kernel/trace/trace_functions_graph.c | 2 +-
3 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 02b8b24..5f8ad7b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -191,8 +191,8 @@ struct ftrace_event_call {
unsigned int flags;
#ifdef CONFIG_PERF_EVENTS
- int perf_refcount;
- struct hlist_head *perf_events;
+ int perf_refcount;
+ struct hlist_head __percpu *perf_events;
#endif
};
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index db2eae2..92f5477 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
#include <linux/kprobes.h>
#include "trace.h"
-static char *perf_trace_buf[PERF_NR_CONTEXTS];
+static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int total_ref_count;
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
- struct hlist_head *list;
+ struct hlist_head __percpu *list;
int ret = -ENOMEM;
int cpu;
@@ -42,11 +42,11 @@ ...Instead of hardcoding the number of contexts for the recursions
barriers, define a cpp constant to make the code more
self-explanatory.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
---
include/linux/perf_event.h | 14 ++++++++------
kernel/perf_event.c | 4 ++--
kernel/trace/trace_event_perf.c | 8 ++++----
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d7e8ea6..ae6fa60 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -808,6 +808,12 @@ struct perf_event_context {
struct rcu_head rcu_head;
};
+/*
+ * Number of contexts where an event can trigger:
+ * task, softirq, hardirq, nmi.
+ */
+#define PERF_NR_CONTEXTS 4
+
/**
* struct perf_event_cpu_context - per cpu event context structure
*/
@@ -821,12 +827,8 @@ struct perf_cpu_context {
struct mutex hlist_mutex;
int hlist_refcount;
- /*
- * Recursion avoidance:
- *
- * task, softirq, irq, nmi context
- */
- int recursion[4];
+ /* Recursion avoidance in each contexts */
+ int recursion[PERF_NR_CONTEXTS];
};
struct perf_output_handle {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 75ab8a2..f416aef 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1772,7 +1772,7 @@ struct callchain_cpus_entries {
struct perf_callchain_entry *cpu_entries[0];
};
-static DEFINE_PER_CPU(int, callchain_recursion[4]);
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
struct callchain_cpus_entries *callchain_cpus_entries;
@@ -1828,7 +1828,7 @@ static int alloc_callchain_buffers(void)
if (!entries)
return -ENOMEM;
...- Most archs use one callchain buffer per cpu, except x86 that needs
to deal with NMIs. Provide a default perf_callchain_buffer()
implementation that x86 overrides.
- Centralize all the kernel/user regs handling and invoke new arch
handlers from there: perf_callchain_user() / perf_callchain_kernel()
That avoid all the user_mode(), current->mm checks and so...
- Invert some parameters in perf_callchain_*() helpers: entry to the
left, regs to the right, following the traditional (dst, src).
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
arch/arm/kernel/perf_event.c | 43 +++---------------------------
arch/powerpc/kernel/perf_callchain.c | 49 +++++++++------------------------
arch/sh/kernel/perf_callchain.c | 37 +------------------------
arch/sparc/kernel/perf_event.c | 46 ++++++++++---------------------
arch/x86/kernel/cpu/perf_event.c | 45 +++++-------------------------
include/linux/perf_event.h | 10 ++++++-
kernel/perf_event.c | 40 ++++++++++++++++++++++++++-
7 files changed, 90 insertions(+), 180 deletions(-)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a07c3b1..0e3bbdb 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3044,17 +3044,13 @@ user_backtrace(struct frame_tail *tail,
return buftail.fp - 1;
}
-static void
-perf_callchain_user(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct frame_tail *tail;
...BTW, I've built tested it on ARM, Sparc64 (thanks Borislav for the pointer), PowerPC and SH. And I've runtime tested it on x86-64, Will has runtime tested it on ARM. Thanks. --
/usr/src/linux-2.6/arch/x86/kernel/cpu/perf_event.c: In function ‘perf_callchain_kernel’:
/usr/src/linux-2.6/arch/x86/kernel/cpu/perf_event.c:1645: warning: ‘return’ with a value, in function returning void
/usr/src/linux-2.6/arch/x86/kernel/cpu/perf_event.c: In function ‘perf_callchain_user’:
/usr/src/linux-2.6/arch/x86/kernel/cpu/perf_event.c:1698: warning: ‘return’ with a value, in function returning void
---
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -1642,7 +1642,7 @@ perf_callchain_kernel(struct perf_callch
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
- return NULL;
+ return;
}
perf_callchain_store(entry, regs->ip);
@@ -1695,7 +1695,7 @@ perf_callchain_user(struct perf_callchai
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
- return NULL;
+ return;
}
fp = (void __user *)regs->bp;
--
Ah sorry. Should I carry this patch (I'd need your Sob) or will you? --
