powerpc: New copy_4K_page()

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Linux Kernel Mailing List
Date: Wednesday, October 15, 2008 - 12:00 pm

Gitweb:     http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=57dda6...
Commit:     57dda6ef5bd5b9e60410477ad29e654097e2cca1
Parent:     2a9294369bd020db89bfdf78b84c3615b39a5c84
Author:     Mark Nelson <markn@au1.ibm.com>
AuthorDate: Fri Aug 22 14:39:00 2008 +1000
Committer:  Paul Mackerras <paulus@samba.org>
CommitDate: Mon Sep 15 11:07:42 2008 -0700

    powerpc: New copy_4K_page()
    
    This new copy_4K_page() function was originally tuned for the best
    performance on the Cell processor, but after testing on more 64bit
    powerpc chips it was found that with a small modification it either
    matched the performance offered by the current mainline version or
    bettered it by a small amount.
    
    It was found that on a Cell-based QS22 blade the amount of system
    time measured when compiling a 2.6.26 pseries_defconfig decreased
    by 4%. Using the same test, a 4-way 970MP machine saw a decrease of
    2% in system time. No noticeable change was seen on Power4, Power5
    or Power6.
    
    The 4096 byte page is copied in thirty-two 128 byte strides. An
    initial setup loop executes dcbt instructions for the whole source
    page and dcbz instructions for the whole destination page. To do
    this, the cache line size is retrieved from ppc64_caches.
    
    A new CPU feature bit, CPU_FTR_CP_USE_DCBTZ, (introduced in the
    previous patch) is used to make the modification to this new copy
    routine - on Power4, 970 and Cell the feature bit is set so the
    setup loop is executed, but on all other 64bit chips the setup
    loop is nop'ed out.
    
    Signed-off-by: Mark Nelson <markn@au1.ibm.com>
    Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/copypage_64.S |  198 +++++++++++++++++++---------------------
 1 files changed, 93 insertions(+), 105 deletions(-)

diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index f9837f4..75f3267 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ * Copyright (C) 2008 Mark Nelson, IBM Corp.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -8,112 +8,100 @@
  */
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+        .section        ".toc","aw"
+PPC64_CACHES:
+        .tc             ppc64_caches[TC],ppc64_caches
+        .section        ".text"
+
 
 _GLOBAL(copy_4K_page)
-	std	r31,-8(1)
-	std	r30,-16(1)
-	std	r29,-24(1)
-	std	r28,-32(1)
-	std	r27,-40(1)
-	std	r26,-48(1)
-	std	r25,-56(1)
-	std	r24,-64(1)
-	std	r23,-72(1)
-	std	r22,-80(1)
-	std	r21,-88(1)
-	std	r20,-96(1)
-	li	r5,4096/32 - 1
+	li	r5,4096		/* 4K page size */
+BEGIN_FTR_SECTION
+	ld      r10,PPC64_CACHES@toc(r2)
+	lwz	r11,DCACHEL1LOGLINESIZE(r10)	/* log2 of cache line size */
+	lwz     r12,DCACHEL1LINESIZE(r10)	/* get cache line size */
+	li	r9,0
+	srd	r8,r5,r11
+
+	mtctr	r8
+setup:
+	dcbt	r9,r4
+	dcbz	r9,r3
+	add	r9,r9,r12
+	bdnz	setup
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
 	addi	r3,r3,-8
-	li	r12,5
-0:	addi	r5,r5,-24
-	mtctr	r12
-	ld	r22,640(4)
-	ld	r21,512(4)
-	ld	r20,384(4)
-	ld	r11,256(4)
-	ld	r9,128(4)
-	ld	r7,0(4)
-	ld	r25,648(4)
-	ld	r24,520(4)
-	ld	r23,392(4)
-	ld	r10,264(4)
-	ld	r8,136(4)
-	ldu	r6,8(4)
-	cmpwi	r5,24
-1:	std	r22,648(3)
-	std	r21,520(3)
-	std	r20,392(3)
-	std	r11,264(3)
-	std	r9,136(3)
-	std	r7,8(3)
-	ld	r28,648(4)
-	ld	r27,520(4)
-	ld	r26,392(4)
-	ld	r31,264(4)
-	ld	r30,136(4)
-	ld	r29,8(4)
-	std	r25,656(3)
-	std	r24,528(3)
-	std	r23,400(3)
-	std	r10,272(3)
-	std	r8,144(3)
-	std	r6,16(3)
-	ld	r22,656(4)
-	ld	r21,528(4)
-	ld	r20,400(4)
-	ld	r11,272(4)
-	ld	r9,144(4)
-	ld	r7,16(4)
-	std	r28,664(3)
-	std	r27,536(3)
-	std	r26,408(3)
-	std	r31,280(3)
-	std	r30,152(3)
-	stdu	r29,24(3)
-	ld	r25,664(4)
-	ld	r24,536(4)
-	ld	r23,408(4)
-	ld	r10,280(4)
-	ld	r8,152(4)
-	ldu	r6,24(4)
+	srdi    r8,r5,7		/* page is copied in 128 byte strides */
+	addi	r8,r8,-1	/* one stride copied outside loop */
+
+	mtctr	r8
+
+	ld	r5,0(r4)
+	ld	r6,8(r4)
+	ld	r7,16(r4)
+	ldu	r8,24(r4)
+1:	std	r5,8(r3)
+	ld	r9,8(r4)
+	std	r6,16(r3)
+	ld	r10,16(r4)
+	std	r7,24(r3)
+	ld	r11,24(r4)
+	std	r8,32(r3)
+	ld	r12,32(r4)
+	std	r9,40(r3)
+	ld	r5,40(r4)
+	std	r10,48(r3)
+	ld	r6,48(r4)
+	std	r11,56(r3)
+	ld	r7,56(r4)
+	std	r12,64(r3)
+	ld	r8,64(r4)
+	std	r5,72(r3)
+	ld	r9,72(r4)
+	std	r6,80(r3)
+	ld	r10,80(r4)
+	std	r7,88(r3)
+	ld	r11,88(r4)
+	std	r8,96(r3)
+	ld	r12,96(r4)
+	std	r9,104(r3)
+	ld	r5,104(r4)
+	std	r10,112(r3)
+	ld	r6,112(r4)
+	std	r11,120(r3)
+	ld	r7,120(r4)
+	stdu	r12,128(r3)
+	ldu	r8,128(r4)
 	bdnz	1b
-	std	r22,648(3)
-	std	r21,520(3)
-	std	r20,392(3)
-	std	r11,264(3)
-	std	r9,136(3)
-	std	r7,8(3)
-	addi	r4,r4,640
-	addi	r3,r3,648
-	bge	0b
-	mtctr	r5
-	ld	r7,0(4)
-	ld	r8,8(4)
-	ldu	r9,16(4)
-3:	ld	r10,8(4)
-	std	r7,8(3)
-	ld	r7,16(4)
-	std	r8,16(3)
-	ld	r8,24(4)
-	std	r9,24(3)
-	ldu	r9,32(4)
-	stdu	r10,32(3)
-	bdnz	3b
-4:	ld	r10,8(4)
-	std	r7,8(3)
-	std	r8,16(3)
-	std	r9,24(3)
-	std	r10,32(3)
-9:	ld	r20,-96(1)
-	ld	r21,-88(1)
-	ld	r22,-80(1)
-	ld	r23,-72(1)
-	ld	r24,-64(1)
-	ld	r25,-56(1)
-	ld	r26,-48(1)
-	ld	r27,-40(1)
-	ld	r28,-32(1)
-	ld	r29,-24(1)
-	ld	r30,-16(1)
-	ld	r31,-8(1)
+
+	std	r5,8(r3)
+	ld	r9,8(r4)
+	std	r6,16(r3)
+	ld	r10,16(r4)
+	std	r7,24(r3)
+	ld	r11,24(r4)
+	std	r8,32(r3)
+	ld	r12,32(r4)
+	std	r9,40(r3)
+	ld	r5,40(r4)
+	std	r10,48(r3)
+	ld	r6,48(r4)
+	std	r11,56(r3)
+	ld	r7,56(r4)
+	std	r12,64(r3)
+	ld	r8,64(r4)
+	std	r5,72(r3)
+	ld	r9,72(r4)
+	std	r6,80(r3)
+	ld	r10,80(r4)
+	std	r7,88(r3)
+	ld	r11,88(r4)
+	std	r8,96(r3)
+	ld	r12,96(r4)
+	std	r9,104(r3)
+	std	r10,112(r3)
+	std	r11,120(r3)
+	std	r12,128(r3)
 	blr
--
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
powerpc: New copy_4K_page(), Linux Kernel Mailing ..., (Wed Oct 15, 12:00 pm)