X86_32 before: 00000000 <bitrev32>: 0: 55 push %ebp 1: 89 c1 mov %eax,%ecx 3: 89 e5 mov %esp,%ebp 5: 0f b6 cd movzbl %ch,%ecx 8: 53 push %ebx 9: 89 c3 mov %eax,%ebx b: 0f b6 c0 movzbl %al,%eax e: 0f b6 90 00 00 00 00 movzbl 0x0(%eax),%edx 15: c1 eb 10 shr $0x10,%ebx 18: 66 0f b6 81 00 00 00 movzbw 0x0(%ecx),%ax 1f: 00 20: c1 e2 08 shl $0x8,%edx 23: 09 d0 or %edx,%eax 25: 0f b6 d3 movzbl %bl,%edx 28: 0f b6 8a 00 00 00 00 movzbl 0x0(%edx),%ecx 2f: 0f b6 d7 movzbl %bh,%edx 32: 66 0f b6 92 00 00 00 movzbw 0x0(%edx),%dx 39: 00 3a: c1 e0 10 shl $0x10,%eax 3d: 5b pop %ebx 3e: 5d pop %ebp 3f: c1 e1 08 shl $0x8,%ecx 42: 09 ca or %ecx,%edx 44: 0f b7 d2 movzwl %dx,%edx 47: 09 d0 or %edx,%eax 49: c3 ret X86_32 after: 00000000 <bitrev32>: 0: 55 push %ebp 1: 89 e5 mov %esp,%ebp 3: 83 ec 08 sub $0x8,%esp 6: 89 45 f8 mov %eax,-0x8(%ebp) 9: 0f b6 45 f8 movzbl -0x8(%ebp),%eax d: 8a 80 00 00 00 00 mov 0x0(%eax),%al 13: 88 45 ff mov %al,-0x1(%ebp) 16: 0f b6 45 f9 movzbl -0x7(%ebp),%eax 1a: 8a 80 00 00 00 00 mov 0x0(%eax),%al 20: 88 45 fe mov %al,-0x2(%ebp) 23: 0f b6 45 fa movzbl -0x6(%ebp),%eax 27: 8a 80 00 00 00 00 mov 0x0(%eax),%al 2d: 88 45 fd mov %al,-0x3(%ebp) 30: 0f b6 45 fb movzbl -0x5(%ebp),%eax 34: 8a 80 00 00 00 00 mov 0x0(%eax),%al 3a: 88 45 fc mov %al,-0x4...
Not very good. Why do it in memory, when doing it in a register is much
clearer?
This generates one byte longer code for me, but avoids the partial memory
stalls and looks more obvious. It is also portable, unlike your version
(you need to use a union to avoid aliasing issues - admittedly we disable
the gcc alias code anyway because of other kernel code, but still).
Linus
---
lib/bitrev.c | 10 ++++------
1 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/lib/bitrev.c b/lib/bitrev.c
index 989aff7..e0cb44e 100644
--- a/lib/bitrev.c
+++ b/lib/bitrev.c
@@ -42,17 +42,15 @@ const u8 byte_rev_table[256] = {
};
EXPORT_SYMBOL_GPL(byte_rev_table);
-static __always_inline u16 bitrev16(u16 x)
-{
- return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8);
-}
-
/**
* bitrev32 - reverse the order of bits in a u32 value
* @x: value to be bit-reversed
*/
u32 bitrev32(u32 x)
{
- return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16);
+ return (bitrev8(x) << 24) |
+ (bitrev8(x >> 8) << 16) |
+ (bitrev8(x >> 16) << 8) |
+ (bitrev8(x >> 24));
}
EXPORT_SYMBOL(bitrev32);
--
| David Newall | Re: Slow DOWN, please!!! |
| Renato S. Yamane | Error -71 on device descriptor read/all |
| Greg Kroah-Hartman | [PATCH 004/196] Chinese: add translation of SubmittingPatches |
| Bart Van Assche | Integration of SCST in the mainstream Linux kernel |
git: | |
| Shawn O. Pearce | libgit2 - a true git library |
| Martin Langhoff | Re: pack operation is thrashing my server |
| Aubrey Li | git proxy issue |
| Pierre Habouzit | git send-email improvements |
| Elad Efrat | Integrating securelevel and kauth(9) |
| Hubert Feyrer | Compressed vnd handling tested successfully |
| Matt Thomas | Interrupt, interrupt threads, continuations, and kernel lwps |
| Michael | Re: yamt-km branch |
| Richard Stallman | Real men don't attack straw men |
| Will Maier | cron doesn't run commands in /etc/crontab? |
| askthelist | Packets Per Second Limit? |
| Harald Dunkel | Packet Filter: how to keep device names on hardware failure? |
| Question on swap as ramdisk partition | 2 hours ago | Linux kernel |
| Netfilter kernel module | 13 hours ago | Linux kernel |
| serial driver xmit problem | 16 hours ago | Linux kernel |
| Why Windows is better than Linux | 16 hours ago | Linux general |
| How can I see my kernel messages in vt12? | 22 hours ago | Linux kernel |
| Grub | 1 day ago | Linux general |
| vmalloc_fault handling in x86_64 | 1 day ago | Linux kernel |
| epoll_wait()ing on epoll FD | 1 day ago | Linux kernel |
| Framebuffer in x86_64 causes problems to multiseat | 2 days ago | Linux kernel |
| Difference between 2.4 and 2.6 regarding thread creation | 2 days ago | Linux general |
