[RFC PATCH v2 -tip 0/4] x86: signal handler improvement

Previous thread: SMACK netfilter smacklabel socket match by Tilman Baumann on Thursday, September 25, 2008 - 10:25 am. (30 messages)

Next thread: [RFC PATCH 0/2] QE Pin Multiplexing API by Anton Vorontsov on Thursday, September 25, 2008 - 11:36 am. (6 messages)
From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 11:08 am

This patch series is experimental.
This is a second version of the series. This update is for further testing.

Changes v1->v2
- passing the error variable as a reference on __{put|get}_user_cerr.

========
I noticed there are inefficient codes in x86 signals.
For example, disassembled 32-bit setup_sigcontext();

0000007c <setup_sigcontext>:
  7c:	55                   	push   %ebp
  7d:	89 e5                	mov    %esp,%ebp
  7f:	57                   	push   %edi
  80:	31 ff                	xor    %edi,%edi
  82:	56                   	push   %esi
  83:	89 c6                	mov    %eax,%esi
  85:	53                   	push   %ebx
  86:	83 ec 58             	sub    $0x58,%esp
  89:	89 55 a4             	mov    %edx,-0x5c(%ebp)
  8c:	89 fa                	mov    %edi,%edx
  8e:	8b 41 24             	mov    0x24(%ecx),%eax
  91:	89 46 04             	mov    %eax,0x4(%esi)
  94:	89 55 a8             	mov    %edx,-0x58(%ebp)
...
 184:	8b 5d ac             	mov    -0x54(%ebp),%ebx
 187:	0b 5d a8             	or     -0x58(%ebp),%ebx
 18a:	0b 5d b0             	or     -0x50(%ebp),%ebx
 18d:	0b 5d b4             	or     -0x4c(%ebp),%ebx
 190:	0b 5d b8             	or     -0x48(%ebp),%ebx
 193:	0b 5d bc             	or     -0x44(%ebp),%ebx
 196:	0b 5d c0             	or     -0x40(%ebp),%ebx
 199:	0b 5d c4             	or     -0x3c(%ebp),%ebx
 19c:	0b 5d c8             	or     -0x38(%ebp),%ebx
 19f:	0b 5d cc             	or     -0x34(%ebp),%ebx
 1a2:	0b 5d d0             	or     -0x30(%ebp),%ebx
 1a5:	0b 5d d4             	or     -0x2c(%ebp),%ebx
 1a8:	0b 5d d8             	or     -0x28(%ebp),%ebx
 1ab:	0b 5d dc             	or     -0x24(%ebp),%ebx
 1ae:	0b 5d e0             	or     -0x20(%ebp),%ebx
 1b1:	0b 5d e4             	or     -0x1c(%ebp),%ebx
 1b4:	0b 5d e8             	or     -0x18(%ebp),%ebx
 1b7:	0b 5d ec             	or     -0x14(%ebp),%ebx
 1ba:	0b 5d f0             	or     -0x10(%ebp),%ebx
 1bd:	09 fb                	or     %edi,%ebx
...
 1dc:	09 d8            ...
From: H. Peter Anvin
Date: Thursday, September 25, 2008 - 11:14 am

This is a gcc failure, and should be reported to the gcc people.

	-hpa
--

From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 11:29 am

Does this gcc failure mean unnecessary storing into stack?

thanks,
Hiroshi Shimamoto

--

From: H. Peter Anvin
Date: Thursday, September 25, 2008 - 11:33 am

Yes, it should be able to process this in a register, instead of storing 
to the stack and then merging later.

It's possible it's trying to do that to hide latency, but that's clearly 
a lose in this case.

I'd hate to obfuscate the code, and I'd *really* hate to obfuscate the 
code to work around gcc brokenness, and then not even bothering to tell 
the gcc folks.

	-hpa
--

From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 12:38 pm

OK, I'll test with some gcc versions.
It's necessary to check outputs of the latest gcc, I think.

thanks,
Hiroshi Shimamoto
--

From: Ingo Molnar
Date: Saturday, September 27, 2008 - 10:11 am

assuming it's reported to the gcc folks too, do you have any objections 
against including this in tip/x86/signal? The 1% improvement in lmbench 
is quite nice.

	Ingo
--

From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 11:18 am

From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>

Use same rule to name of __put_user and __get_user.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
 include/asm-x86/uaccess.h |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 414b116..c098dfe 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -186,7 +186,7 @@ extern int __get_user_bad(void);
 
 
 #ifdef CONFIG_X86_32
-#define __put_user_u64(x, addr, err)					\
+#define __put_user_asm_u64(x, addr, err)					\
 	asm volatile("1:	movl %%eax,0(%2)\n"			\
 		     "2:	movl %%edx,4(%2)\n"			\
 		     "3:\n"						\
@@ -203,7 +203,7 @@ extern int __get_user_bad(void);
 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
 		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 #else
-#define __put_user_u64(x, ptr, retval) \
+#define __put_user_asm_u64(x, ptr, retval) \
 	__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
 #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
 #endif
@@ -279,7 +279,7 @@ do {									\
 		__put_user_asm(x, ptr, retval, "l", "k",  "ir", errret);\
 		break;							\
 	case 8:								\
-		__put_user_u64((__typeof__(*ptr))(x), ptr, retval);	\
+		__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval);	\
 		break;							\
 	default:							\
 		__put_user_bad();					\
-- 
1.5.6

--

From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 11:18 am

From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>

Introduce __{put|get}_user_asm_eop which receives eop for error opecode.
Define __{put|get}_user_asm as __{put|get}_user_asm_eop with "mov".

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
 include/asm-x86/uaccess.h |   27 +++++++++++++++++++++------
 1 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index c098dfe..84b0600 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -186,12 +186,12 @@ extern int __get_user_bad(void);
 
 
 #ifdef CONFIG_X86_32
-#define __put_user_asm_u64(x, addr, err)					\
+#define __put_user_asm_eop_u64(x, addr, eop, err)			\
 	asm volatile("1:	movl %%eax,0(%2)\n"			\
 		     "2:	movl %%edx,4(%2)\n"			\
 		     "3:\n"						\
 		     ".section .fixup,\"ax\"\n"				\
-		     "4:	movl %3,%0\n"				\
+		     "4:	" eop " %3,%0\n"			\
 		     "	jmp 3b\n"					\
 		     ".previous\n"					\
 		     _ASM_EXTABLE(1b, 4b)				\
@@ -199,12 +199,17 @@ extern int __get_user_bad(void);
 		     : "=r" (err)					\
 		     : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
 
+#define __put_user_asm_u64(x, addr, err)			\
+	__put_user_asm_eop_u64(x, addr, "movl", err)
+
 #define __put_user_x8(x, ptr, __ret_pu)				\
 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
 		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 #else
 #define __put_user_asm_u64(x, ptr, retval) \
 	__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
+#define __put_user_asm_eop_u64(x, ptr, eop, retval) \
+	__put_user_asm_eop(x, ptr, retval, "q", "", "Zr", eop, -EFAULT)
 #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
 #endif
 
@@ -311,9 +316,12 @@ do {									\
 
 #ifdef CONFIG_X86_32
 #define __get_user_asm_u64(x, ptr, retval, errret)	(x) = __get_user_bad()
+#define __get_user_asm_eop_u64(x, ptr, retval, eop, errret)	(x) = __get_user_bad()
 #else
 #define __get_user_asm_u64(x, ptr, retval, ...
From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 11:18 am

From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>

Introduce __{put|get}_user_cerr for cumulative error handling.
The following 2 lines are same.
  __{put|get}_user_cerr(x, ptr, &err);
  err |= __{put|get}_user(x, ptr);

Introduce __{put|get}_user_size_cerr for internal use from __{put|get}_user_cerr.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
 include/asm-x86/uaccess.h |   74 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 74 insertions(+), 0 deletions(-)

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 84b0600..284c7d3 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -291,6 +291,31 @@ do {									\
 	}								\
 } while (0)
 
+#define __put_user_size_cerr(x, ptr, size, retval, errret)		\
+do {									\
+	__chk_user_ptr(ptr);						\
+	switch (size) {							\
+	case 1:								\
+		__put_user_asm_eop(x, ptr, retval, "b", "b", "iq",	\
+							"or", errret);	\
+		break;							\
+	case 2:								\
+		__put_user_asm_eop(x, ptr, retval, "w", "w", "ir",	\
+							"or", errret);	\
+		break;							\
+	case 4:								\
+		__put_user_asm_eop(x, ptr, retval, "l", "k",  "ir",	\
+							"or", errret);	\
+		break;							\
+	case 8:								\
+		__put_user_asm_eop_u64((__typeof__(*ptr))(x), ptr,	\
+							"or", retval);	\
+		break;							\
+	default:							\
+		__put_user_bad();					\
+	}								\
+} while (0)
+
 #else
 
 #define __put_user_size(x, ptr, size, retval, errret)			\
@@ -302,6 +327,14 @@ do {									\
 		retval = errret;					\
 } while (0)
 
+#define __put_user_size_cerr(x, ptr, size, retval, errret)		\
+do {									\
+	__typeof__(*(ptr))__pus_tmp = x;				\
+									\
+	if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0))	\
+		retval |= errret;					\
+} while (0)
+
 #define put_user(x, ptr)					\
 ({								\
 	int __ret_pu;						\
@@ -346,6 +379,30 @@ do {									\
 	}								\
 } while (0)
 
+#define __get_user_size_cerr(x, ptr, size, ...
From: Hiroshi Shimamoto
Date: Thursday, September 25, 2008 - 11:18 am

From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>

Use __{put|get}_user_cerr for cumulative error handling in x86 signal code.
This makes stack usage and code size small.

The line
	err |= __put_user(x, ptr);
is comiled to like this;
  a0:	89 fa                	mov    %edi,%edx
  a2:	8b 41 20             	mov    0x20(%ecx),%eax
  a5:	89 46 08             	mov    %eax,0x8(%esi)
  a8:	89 55 b0             	mov    %edx,-0x50(%ebp)

and the line
	__put_user_cerr(x, ptr, &err);
is comiled to like this;
  92:	8b 41 20             	mov    0x20(%ecx),%eax
  95:	89 47 08             	mov    %eax,0x8(%edi)

and the fixup code on exception looks like this;
00000000 <.fixup>:
   0:	83 ce f2             	or     $0xfffffff2,%esi
   3:	e9 8a 00 00 00       	jmp    92 <.fixup+0x92>

$ size signal_*
   text	   data	    bss	    dec	    hex	filename
   4507	      0	      0	   4507	   119b	signal_32.o.new
   5031	      0	      0	   5031	   13a7	signal_32.o.old
   3827	      0	      0	   3827	    ef3	signal_64.o.new
   4652	      0	      0	   4652	   122c	signal_64.o.old

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
 arch/x86/kernel/signal_32.c |   96 +++++++++++++++++++++---------------------
 arch/x86/kernel/signal_64.c |   86 +++++++++++++++++++-------------------
 2 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 4337cd5..0c72f54 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -126,21 +126,21 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+#define COPY(x)		__get_user_cerr(regs->x, &sc->x, &err)
 
 #define COPY_SEG(seg)							\
 	{ unsigned short tmp;						\
-	  err |= __get_user(tmp, &sc->seg);				\
+	  __get_user_cerr(tmp, &sc->seg, ...
Previous thread: SMACK netfilter smacklabel socket match by Tilman Baumann on Thursday, September 25, 2008 - 10:25 am. (30 messages)

Next thread: [RFC PATCH 0/2] QE Pin Multiplexing API by Anton Vorontsov on Thursday, September 25, 2008 - 11:36 am. (6 messages)