Re: [PATCH] tracing: Cleanup the convoluted softirq tracepoints

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Steven Rostedt
Date: Tuesday, October 19, 2010 - 1:55 pm

On Tue, 2010-10-19 at 21:49 +0200, Thomas Gleixner wrote:


Sure, this is where it gets fun :-)


Since I don't have your patch yet, I used the original:


void test(struct softirq_action *h)
{
        trace_softirq_entry(h, softirq_vec);
        h->action(h);
}



Well, the one jmpq is noped out, and the jmp is non conditional. I've
always thought a non conditional jmp was faster than a conditional one,
since there's no need to go into the branch prediction logic. The CPU
can simply skip to the code to jump next. Of counse, this pollutes the 
I$.



Because you do the h - softvec in the tracepoint parameter? I got a
different result:

Here's the diff. I did a cut -c10- to get rid of the line numbers so I
have a better diff. There's still differences due to jump locations, but
those are easy to figure out:

I diffed nojump vs jump. The '-' is with nojump, the '+' is with jumps.

--- /tmp/s2	2010-10-19 16:40:19.000000000 -0400
+++ /tmp/s1	2010-10-19 16:40:23.000000000 -0400
@@ -1,38 +1,33 @@
-00026f0 <test>:
+00027a0 <test>:
 	55                   	push   %rbp
 	48 89 e5             	mov    %rsp,%rbp
-	48 83 ec 10          	sub    $0x10,%rsp
-	48 89 1c 24          	mov    %rbx,(%rsp)
-	4c 89 64 24 08       	mov    %r12,0x8(%rsp)
-	e8 00 00 00 00       	callq  2706 <test+0x16>
+	41 54                	push   %r12
+	53                   	push   %rbx
+	e8 00 00 00 00       	callq  27ac <test+0xc>
 R_X86_64_PC32	mcount-0x4
-	8b 15 00 00 00 00    	mov    0x0(%rip),%edx        # 270c <test+0x1c>
-R_X86_64_PC32	__tracepoint_softirq_entry+0x4
 	48 89 fb             	mov    %rdi,%rbx

vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
-	85 d2                	test   %edx,%edx
-	75 10                	jne    2723 <test+0x33>
+	e9 00 00 00 00       	jmpq   27b4 <test+0x14>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

There's the difference with this code. We replaced a test and jump
conditional with a single jump that will later be nop'd out.


 	48 89 df             	mov    %rbx,%rdi
 	ff 13                	callq  *(%rbx)
-	48 8b 1c 24          	mov    (%rsp),%rbx
-	4c 8b 64 24 08       	mov    0x8(%rsp),%r12
+	5b                   	pop    %rbx
+	41 5c                	pop    %r12
 	c9                   	leaveq 
 	c3                   	retq

^^^^^^^^^^^^^^^^^^^
end of the fast path, below is the code that does the tracepoint.



   
+	66 90                	xchg   %ax,%ax
 	65 48 8b 04 25 00 00 	mov    %gs:0x0,%rax
 	00 00 
 R_X86_64_32S	kernel_stack
 	83 80 44 e0 ff ff 01 	addl   $0x1,-0x1fbc(%rax)
-	e8 00 00 00 00       	callq  2738 <test+0x48>
+	e8 00 00 00 00       	callq  27d5 <test+0x35>
 R_X86_64_PC32	debug_lockdep_rcu_enabled-0x4
 	85 c0                	test   %eax,%eax
-	74 09                	je     2745 <test+0x55>
-	80 3d 00 00 00 00 00 	cmpb   $0x0,0x0(%rip)        # 2743 <test+0x53>
-R_X86_64_PC32	.bss-0x1
-	74 53                	je     2798 <test+0xa8>
-	4c 8b 25 00 00 00 00 	mov    0x0(%rip),%r12        # 274c <test+0x5c>
+	75 57                	jne    2830 <test+0x90>
+	4c 8b 25 00 00 00 00 	mov    0x0(%rip),%r12        # 27e0 <test+0x40>
 R_X86_64_PC32	__tracepoint_softirq_entry+0x1c
 	4d 85 e4             	test   %r12,%r12
-	74 22                	je     2773 <test+0x83>
+	74 29                	je     280e <test+0x6e>
 	49 8b 04 24          	mov    (%r12),%rax
+	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)
 	49 8b 7c 24 08       	mov    0x8(%r12),%rdi
 	49 83 c4 10          	add    $0x10,%r12
 	48 c7 c2 00 00 00 00 	mov    $0x0,%rdx
@@ -41,49 +36,52 @@
 	ff d0                	callq  *%rax
 	49 8b 04 24          	mov    (%r12),%rax
 	48 85 c0             	test   %rax,%rax
-	75 e2                	jne    2755 <test+0x65>
+	75 e2                	jne    27f0 <test+0x50>
 	65 48 8b 04 25 00 00 	mov    %gs:0x0,%rax
 	00 00 
 R_X86_64_32S	kernel_stack
 	83 a8 44 e0 ff ff 01 	subl   $0x1,-0x1fbc(%rax)
 	48 8b 80 38 e0 ff ff 	mov    -0x1fc8(%rax),%rax
 	a8 08                	test   $0x8,%al
-	74 85                	je     2713 <test+0x23>
-	e8 00 00 00 00       	callq  2793 <test+0xa3>
+	74 8b                	je     27b4 <test+0x14>
+	e8 00 00 00 00       	callq  282e <test+0x8e>
 R_X86_64_PC32	preempt_schedule-0x4
-	e9 7b ff ff ff       	jmpq   2713 <test+0x23>
-	0f 1f 84 00 00 00 00 	nopl   0x0(%rax,%rax,1)
-	00 
-	e8 00 00 00 00       	callq  27a5 <test+0xb5>
+	eb 84                	jmp    27b4 <test+0x14>
+	80 3d 00 00 00 00 00 	cmpb   $0x0,0x0(%rip)        # 2837 <test+0x97>
+R_X86_64_PC32	.bss-0x1
+	75 a0                	jne    27d9 <test+0x39>
+	e8 00 00 00 00       	callq  283e <test+0x9e>
 R_X86_64_PC32	debug_lockdep_rcu_enabled-0x4
 	85 c0                	test   %eax,%eax
-	74 9c                	je     2745 <test+0x55>
-	83 3d 00 00 00 00 00 	cmpl   $0x0,0x0(%rip)        # 27b0 <test+0xc0>
-R_X86_64_PC32	debug_locks-0x5
-	75 3f                	jne    27f1 <test+0x101>
+	74 97                	je     27d9 <test+0x39>
+	8b 35 00 00 00 00    	mov    0x0(%rip),%esi        # 2848 <test+0xa8>
+R_X86_64_PC32	debug_locks-0x4
+	85 f6                	test   %esi,%esi
+	75 44                	jne    2890 <test+0xf0>
 	65 48 8b 04 25 00 00 	mov    %gs:0x0,%rax
 	00 00 
 R_X86_64_32S	kernel_stack
-	83 b8 44 e0 ff ff 00 	cmpl   $0x0,-0x1fbc(%rax)
-	75 81                	jne    2745 <test+0x55>
+	8b 88 44 e0 ff ff    	mov    -0x1fbc(%rax),%ecx
+	85 c9                	test   %ecx,%ecx
+	0f 85 76 ff ff ff    	jne    27d9 <test+0x39>
 	ff 14 25 00 00 00 00 	callq  *0x0
 R_X86_64_32S	pv_irq_ops
 	f6 c4 02             	test   $0x2,%ah
-	0f 84 71 ff ff ff    	je     2745 <test+0x55>
+	0f 84 66 ff ff ff    	je     27d9 <test+0x39>
 	be 7c 00 00 00       	mov    $0x7c,%esi
 	48 c7 c7 00 00 00 00 	mov    $0x0,%rdi
 R_X86_64_32S	.rodata.str1.1
-	c6 05 00 00 00 00 01 	movb   $0x1,0x0(%rip)        # 27e7 <test+0xf7>
+	c6 05 00 00 00 00 01 	movb   $0x1,0x0(%rip)        # 2886 <test+0xe6>
 R_X86_64_PC32	.bss-0x1
-	e8 00 00 00 00       	callq  27ec <test+0xfc>
+	e8 00 00 00 00       	callq  288b <test+0xeb>
 R_X86_64_PC32	lockdep_rcu_dereference-0x4
-	e9 54 ff ff ff       	jmpq   2745 <test+0x55>
+	e9 49 ff ff ff       	jmpq   27d9 <test+0x39>
 	48 c7 c7 00 00 00 00 	mov    $0x0,%rdi
 R_X86_64_32S	rcu_sched_lock_map
-	e8 00 00 00 00       	callq  27fd <test+0x10d>
+	e8 00 00 00 00       	callq  289c <test+0xfc>
 R_X86_64_PC32	lock_is_held-0x4
 	85 c0                	test   %eax,%eax
-	0f 85 40 ff ff ff    	jne    2745 <test+0x55>
-	eb ab                	jmp    27b2 <test+0xc2>
-	66 0f 1f 84 00 00 00 	nopw   0x0(%rax,%rax,1)
-	00 00 
+	0f 85 35 ff ff ff    	jne    27d9 <test+0x39>
+	eb a6                	jmp    284c <test+0xac>
+	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
+	00 00 00 



Perhaps so, but as Peter Zijlsta has said, compiling with gcc is a
random number generator. Your mileage may vary.


The above looks like what I have.

-- Steve




--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[PATCH v4 0/5] netdev: show a process of packets, Koki Sanagi, (Mon Aug 23, 2:41 am)
[PATCH v4 1/5] irq: add tracepoint to softirq_raise, Koki Sanagi, (Mon Aug 23, 2:42 am)
[PATCH v4 4/5] skb: add tracepoints to freeing skb, Koki Sanagi, (Mon Aug 23, 2:46 am)
Re: [PATCH v4 4/5] skb: add tracepoints to freeing skb, David Miller, (Mon Aug 23, 8:53 pm)
Re: [PATCH v4 0/5] netdev: show a process of packets, Steven Rostedt, (Mon Aug 30, 4:50 pm)
Re: [PATCH v4 0/5] netdev: show a process of packets, Koki Sanagi, (Thu Sep 2, 7:10 pm)
Re: [PATCH v4 0/5] netdev: show a process of packets, David Miller, (Thu Sep 2, 7:17 pm)
Re: [PATCH v4 0/5] netdev: show a process of packets, Koki Sanagi, (Thu Sep 2, 7:55 pm)
Re: [PATCH v4 0/5] netdev: show a process of packets, Frederic Weisbecker, (Thu Sep 2, 9:46 pm)
Re: [PATCH v4 0/5] netdev: show a process of packets, Koki Sanagi, (Thu Sep 2, 10:12 pm)
Re: [PATCH v4 1/5] irq: add tracepoint to softirq_raise, Frederic Weisbecker, (Fri Sep 3, 8:29 am)
Re: [PATCH v4 1/5] irq: add tracepoint to softirq_raise, Steven Rostedt, (Fri Sep 3, 8:39 am)
Re: [PATCH v4 1/5] irq: add tracepoint to softirq_raise, Frederic Weisbecker, (Fri Sep 3, 8:42 am)
Re: [PATCH v4 1/5] irq: add tracepoint to softirq_raise, Steven Rostedt, (Fri Sep 3, 8:43 am)
Re: [PATCH v4 1/5] irq: add tracepoint to softirq_raise, Frederic Weisbecker, (Fri Sep 3, 8:50 am)
Re: [PATCH v4 5/5] perf:add a script shows a process of packet, Frederic Weisbecker, (Tue Sep 7, 9:57 am)
[tip:perf/core] irq: Add tracepoint to softirq_raise, tip-bot for Lai Jian ..., (Wed Sep 8, 1:33 am)
[tip:perf/core] napi: Convert trace_napi_poll to TRACE_EVENT, tip-bot for Neil Horman, (Wed Sep 8, 1:34 am)
[tip:perf/core] skb: Add tracepoints to freeing skb, tip-bot for Koki Sanagi, (Wed Sep 8, 1:35 am)
[tip:perf/core] perf: Add a script to show packets processing, tip-bot for Koki Sanagi, (Wed Sep 8, 1:35 am)
[PATCH] irq: Fix circular headers dependency, Frederic Weisbecker, (Wed Sep 8, 5:26 am)
[tip:perf/core] irq: Fix circular headers dependency, tip-bot for Frederic ..., (Thu Sep 9, 12:54 pm)
[PATCH] tracing: Cleanup the convoluted softirq tracepoints, Thomas Gleixner, (Tue Oct 19, 6:00 am)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Tue Oct 19, 6:22 am)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Tue Oct 19, 7:00 am)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Tue Oct 19, 7:28 am)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Steven Rostedt, (Tue Oct 19, 1:55 pm)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Tue Oct 19, 3:41 pm)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Wed Oct 20, 8:41 am)
[tip:perf/core] tracing: Cleanup the convoluted softirq tr ..., tip-bot for Thomas G ..., (Thu Oct 21, 7:52 am)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Mon Oct 25, 3:01 pm)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Mon Oct 25, 3:55 pm)
Re: [PATCH] tracing: Cleanup the convoluted softirq tracep ..., Mathieu Desnoyers, (Mon Oct 25, 6:14 pm)