This patch series is experimental.
This is a second version of the series. This update is for further testing.
Changes v1->v2
- passing the error variable as a reference on __{put|get}_user_cerr.
========
I noticed there are inefficient codes in x86 signals.
For example, disassembled 32-bit setup_sigcontext();
0000007c <setup_sigcontext>:
7c: 55 push %ebp
7d: 89 e5 mov %esp,%ebp
7f: 57 push %edi
80: 31 ff xor %edi,%edi
82: 56 push %esi
83: 89 c6 mov %eax,%esi
85: 53 push %ebx
86: 83 ec 58 sub $0x58,%esp
89: 89 55 a4 mov %edx,-0x5c(%ebp)
8c: 89 fa mov %edi,%edx
8e: 8b 41 24 mov 0x24(%ecx),%eax
91: 89 46 04 mov %eax,0x4(%esi)
94: 89 55 a8 mov %edx,-0x58(%ebp)
...
184: 8b 5d ac mov -0x54(%ebp),%ebx
187: 0b 5d a8 or -0x58(%ebp),%ebx
18a: 0b 5d b0 or -0x50(%ebp),%ebx
18d: 0b 5d b4 or -0x4c(%ebp),%ebx
190: 0b 5d b8 or -0x48(%ebp),%ebx
193: 0b 5d bc or -0x44(%ebp),%ebx
196: 0b 5d c0 or -0x40(%ebp),%ebx
199: 0b 5d c4 or -0x3c(%ebp),%ebx
19c: 0b 5d c8 or -0x38(%ebp),%ebx
19f: 0b 5d cc or -0x34(%ebp),%ebx
1a2: 0b 5d d0 or -0x30(%ebp),%ebx
1a5: 0b 5d d4 or -0x2c(%ebp),%ebx
1a8: 0b 5d d8 or -0x28(%ebp),%ebx
1ab: 0b 5d dc or -0x24(%ebp),%ebx
1ae: 0b 5d e0 or -0x20(%ebp),%ebx
1b1: 0b 5d e4 or -0x1c(%ebp),%ebx
1b4: 0b 5d e8 or -0x18(%ebp),%ebx
1b7: 0b 5d ec or -0x14(%ebp),%ebx
1ba: 0b 5d f0 or -0x10(%ebp),%ebx
1bd: 09 fb or %edi,%ebx
...
1dc: 09 d8 ...This is a gcc failure, and should be reported to the gcc people. -hpa --
Does this gcc failure mean unnecessary storing into stack? thanks, Hiroshi Shimamoto --
Yes, it should be able to process this in a register, instead of storing to the stack and then merging later. It's possible it's trying to do that to hide latency, but that's clearly a lose in this case. I'd hate to obfuscate the code, and I'd *really* hate to obfuscate the code to work around gcc brokenness, and then not even bothering to tell the gcc folks. -hpa --
OK, I'll test with some gcc versions. It's necessary to check outputs of the latest gcc, I think. thanks, Hiroshi Shimamoto --
assuming it's reported to the gcc folks too, do you have any objections against including this in tip/x86/signal? The 1% improvement in lmbench is quite nice. Ingo --
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Use same rule to name of __put_user and __get_user.
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
include/asm-x86/uaccess.h | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 414b116..c098dfe 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -186,7 +186,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
-#define __put_user_u64(x, addr, err) \
+#define __put_user_asm_u64(x, addr, err) \
asm volatile("1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
"3:\n" \
@@ -203,7 +203,7 @@ extern int __get_user_bad(void);
asm volatile("call __put_user_8" : "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
-#define __put_user_u64(x, ptr, retval) \
+#define __put_user_asm_u64(x, ptr, retval) \
__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
#endif
@@ -279,7 +279,7 @@ do { \
__put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\
break; \
case 8: \
- __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \
+ __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \
break; \
default: \
__put_user_bad(); \
--
1.5.6
--
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Introduce __{put|get}_user_asm_eop which receives eop for error opecode.
Define __{put|get}_user_asm as __{put|get}_user_asm_eop with "mov".
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
include/asm-x86/uaccess.h | 27 +++++++++++++++++++++------
1 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index c098dfe..84b0600 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -186,12 +186,12 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
-#define __put_user_asm_u64(x, addr, err) \
+#define __put_user_asm_eop_u64(x, addr, eop, err) \
asm volatile("1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
- "4: movl %3,%0\n" \
+ "4: " eop " %3,%0\n" \
" jmp 3b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 4b) \
@@ -199,12 +199,17 @@ extern int __get_user_bad(void);
: "=r" (err) \
: "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+#define __put_user_asm_u64(x, addr, err) \
+ __put_user_asm_eop_u64(x, addr, "movl", err)
+
#define __put_user_x8(x, ptr, __ret_pu) \
asm volatile("call __put_user_8" : "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
#define __put_user_asm_u64(x, ptr, retval) \
__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
+#define __put_user_asm_eop_u64(x, ptr, eop, retval) \
+ __put_user_asm_eop(x, ptr, retval, "q", "", "Zr", eop, -EFAULT)
#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
#endif
@@ -311,9 +316,12 @@ do { \
#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
+#define __get_user_asm_eop_u64(x, ptr, retval, eop, errret) (x) = __get_user_bad()
#else
#define __get_user_asm_u64(x, ptr, retval, ...From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Introduce __{put|get}_user_cerr for cumulative error handling.
The following 2 lines are same.
__{put|get}_user_cerr(x, ptr, &err);
err |= __{put|get}_user(x, ptr);
Introduce __{put|get}_user_size_cerr for internal use from __{put|get}_user_cerr.
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
include/asm-x86/uaccess.h | 74 +++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 74 insertions(+), 0 deletions(-)
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 84b0600..284c7d3 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -291,6 +291,31 @@ do { \
} \
} while (0)
+#define __put_user_size_cerr(x, ptr, size, retval, errret) \
+do { \
+ __chk_user_ptr(ptr); \
+ switch (size) { \
+ case 1: \
+ __put_user_asm_eop(x, ptr, retval, "b", "b", "iq", \
+ "or", errret); \
+ break; \
+ case 2: \
+ __put_user_asm_eop(x, ptr, retval, "w", "w", "ir", \
+ "or", errret); \
+ break; \
+ case 4: \
+ __put_user_asm_eop(x, ptr, retval, "l", "k", "ir", \
+ "or", errret); \
+ break; \
+ case 8: \
+ __put_user_asm_eop_u64((__typeof__(*ptr))(x), ptr, \
+ "or", retval); \
+ break; \
+ default: \
+ __put_user_bad(); \
+ } \
+} while (0)
+
#else
#define __put_user_size(x, ptr, size, retval, errret) \
@@ -302,6 +327,14 @@ do { \
retval = errret; \
} while (0)
+#define __put_user_size_cerr(x, ptr, size, retval, errret) \
+do { \
+ __typeof__(*(ptr))__pus_tmp = x; \
+ \
+ if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
+ retval |= errret; \
+} while (0)
+
#define put_user(x, ptr) \
({ \
int __ret_pu; \
@@ -346,6 +379,30 @@ do { \
} \
} while (0)
+#define __get_user_size_cerr(x, ptr, size, ...From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Use __{put|get}_user_cerr for cumulative error handling in x86 signal code.
This makes stack usage and code size small.
The line
err |= __put_user(x, ptr);
is comiled to like this;
a0: 89 fa mov %edi,%edx
a2: 8b 41 20 mov 0x20(%ecx),%eax
a5: 89 46 08 mov %eax,0x8(%esi)
a8: 89 55 b0 mov %edx,-0x50(%ebp)
and the line
__put_user_cerr(x, ptr, &err);
is comiled to like this;
92: 8b 41 20 mov 0x20(%ecx),%eax
95: 89 47 08 mov %eax,0x8(%edi)
and the fixup code on exception looks like this;
00000000 <.fixup>:
0: 83 ce f2 or $0xfffffff2,%esi
3: e9 8a 00 00 00 jmp 92 <.fixup+0x92>
$ size signal_*
text data bss dec hex filename
4507 0 0 4507 119b signal_32.o.new
5031 0 0 5031 13a7 signal_32.o.old
3827 0 0 3827 ef3 signal_64.o.new
4652 0 0 4652 122c signal_64.o.old
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
---
arch/x86/kernel/signal_32.c | 96 +++++++++++++++++++++---------------------
arch/x86/kernel/signal_64.c | 86 +++++++++++++++++++-------------------
2 files changed, 91 insertions(+), 91 deletions(-)
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 4337cd5..0c72f54 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -126,21 +126,21 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#define COPY(x) err |= __get_user(regs->x, &sc->x)
+#define COPY(x) __get_user_cerr(regs->x, &sc->x, &err)
#define COPY_SEG(seg) \
{ unsigned short tmp; \
- err |= __get_user(tmp, &sc->seg); \
+ __get_user_cerr(tmp, &sc->seg, ...