[PATCH 3/3] x86/iommu: use __GFP_ZERO instead of memset for GART

Previous thread: none

Next thread: lockdep warning inside ide/bio by Dmitry Baryshkov on Thursday, September 25, 2008 - 3:53 am. (1 message)
From: Joerg Roedel
Date: Thursday, September 25, 2008 - 3:13 am

Hi,

this small patch series contains some cleanups for the K8 GART driver in
the x86 architecture.

diffstat:

 arch/x86/kernel/pci-gart_64.c |   38 ++++++++++++++++++--------------------
 include/asm-x86/gart.h        |    2 ++
 2 files changed, 20 insertions(+), 20 deletions(-)



--

From: Joerg Roedel
Date: Thursday, September 25, 2008 - 3:13 am

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/pci-gart_64.c |   11 ++++-------
 1 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aecea06..84d541f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	info->aper_size = aper_size >> 20;
 
 	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
-	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(gatt_size));
 	if (!gatt)
 		panic("Cannot allocate GATT table");
 	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
 		panic("Could not set GART PTEs to uncacheable pages");
 
-	memset(gatt, 0, gatt_size);
 	agp_gatt_table = gatt;
 
 	enable_gart_translations();
@@ -788,18 +788,15 @@ void __init gart_iommu_init(void)
 	iommu_size = check_iommu_size(info.aper_base, aper_size);
 	iommu_pages = iommu_size >> PAGE_SHIFT;
 
-	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 						      get_order(iommu_pages/8));
 	if (!iommu_gart_bitmap)
 		panic("Cannot allocate iommu bitmap\n");
-	memset(iommu_gart_bitmap, 0, iommu_pages/8);
 
 #ifdef CONFIG_IOMMU_LEAK
 	if (leak_trace) {
-		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
 				  get_order(iommu_pages*sizeof(void *)));
-		if (iommu_leak_tab)
-			memset(iommu_leak_tab, 0, iommu_pages * 8);
 		else
 			printk(KERN_DEBUG
 			       "PCI-DMA: Cannot allocate leak trace area\n");
-- 
1.5.6.4


--

From: Ingo Molnar
Date: Thursday, September 25, 2008 - 3:20 am

hm, that looks broken.

	Ingo
--

From: Joerg Roedel
Date: Thursday, September 25, 2008 - 3:42 am

Argh, yes. Please try this one:

=
From c402fd7f5fe930d45a02ef863bf3fb61851e6ad0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 25 Sep 2008 12:39:06 +0200
Subject: [PATCH] x86/iommu: use __GFP_ZERO instead of memset for GART

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/pci-gart_64.c |   13 +++++--------
 1 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aecea06..d077116 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	info->aper_size = aper_size >> 20;
 
 	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
-	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(gatt_size));
 	if (!gatt)
 		panic("Cannot allocate GATT table");
 	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
 		panic("Could not set GART PTEs to uncacheable pages");
 
-	memset(gatt, 0, gatt_size);
 	agp_gatt_table = gatt;
 
 	enable_gart_translations();
@@ -788,19 +788,16 @@ void __init gart_iommu_init(void)
 	iommu_size = check_iommu_size(info.aper_base, aper_size);
 	iommu_pages = iommu_size >> PAGE_SHIFT;
 
-	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 						      get_order(iommu_pages/8));
 	if (!iommu_gart_bitmap)
 		panic("Cannot allocate iommu bitmap\n");
-	memset(iommu_gart_bitmap, 0, iommu_pages/8);
 
 #ifdef CONFIG_IOMMU_LEAK
 	if (leak_trace) {
-		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
 				  get_order(iommu_pages*sizeof(void *)));
-		if (iommu_leak_tab)
-			memset(iommu_leak_tab, 0, iommu_pages * 8);
-		else
+		if (!iommu_leak_tab)
 			printk(KERN_DEBUG
 ...
From: Ingo Molnar
Date: Saturday, September 27, 2008 - 11:14 am

applied these patches to tip/x86/iommu:

 0114267: x86/iommu: use __GFP_ZERO instead of memset for GART
 3610f21: x86/iommu: convert GART need_flush to bool
 237a622: x86/iommu: make GART driver checkpatch clean

thanks Joerg!

Fujita, do they look fine to you too?

	Ingo
--

From: Ingo Molnar
Date: Saturday, September 27, 2008 - 11:18 am

another thing:

How hard would it be to add an CONFIG_IOMMU_DEBUG option that forces as 
many DMA requests to go via the IOMMU as possible?

This slows things down of course so it's only for debugging - but it 
also makes sure that we utilize the IOMMU code to the maximum - which is 
not normally the case.

Would be nice to have it .config driven (default-disabled), so that 
-tip's randconfig testing can stumble upon it every now and then. I've 
got GART test-systems - this way we could find certain types of IOMMU 
breakages sooner.

	Ingo
--

From: FUJITA Tomonori
Date: Sunday, September 28, 2008 - 11:06 am

On Sat, 27 Sep 2008 20:18:55 +0200

If you use iommu=force boot option, GART always tries to use the
IOMMU.
--

From: Joerg Roedel
Date: Sunday, September 28, 2008 - 12:34 pm

For AMD IOMMU I disabled the round-robin allocator to stress-test the
code. This means that the address allocation bitmap is always traversed
from the first bit. In consequence the TLB flushing is stressed a lot
(both in hardware and software) because the same DMA addresses are used
again and again. For testing I hardcoded it into the driver but I can
also make it depend on CONFIG_IOMMU_DEBUG.

Joerg

-- 
           |           AMD Saxony Limited Liability Company & Co. KG
 Operating |         Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System    |                  Register Court Dresden: HRA 4896
 Research  |              General Partner authorized to represent:
 Center    |             AMD Saxony LLC (Wilmington, Delaware, US)
           | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

--

From: Ingo Molnar
Date: Monday, September 29, 2008 - 1:56 am

yes - could you please make a new option for it, 
CONFIG_IOMMU_DEBUG_FORCE=y or so - and cover all iommus that support it?

	Ingo
--

From: FUJITA Tomonori
Date: Monday, September 29, 2008 - 7:11 am

On Mon, 29 Sep 2008 10:56:47 +0200

I'm not sure we are talking about the same thing. Surely, I and Joerg
are talking different things.

GART driver doesn't need to use the IOMMU hardware at all times. GART
does a virtual mapping only when necessary (a device needs to handle
an address that it can't access to). But as I wrote, if you use
iommu=force, GART driver always use the IOMMU hardware.


Other x86 IOMMU drivers always use the IOMMU hardware. Except for
Intel VT-D, they manage free virtual I/O space in the round-robin
manner with the bitmap algorithm to avoid frequent IOTLB flush. Joerg
said he tested AMD IOMMU driver with the round-robin manner disabled
so AMD IOMMU driver uses the same virtual I/O space with lots of IOTLB
flush.
--

From: Ingo Molnar
Date: Tuesday, September 30, 2008 - 4:03 am

yes - but iommu=force is not randconfig covered, hence it never gets 
tested by -tip testing. So my suggestion was a really simple patch: a 
new Kconfig entry that makes iommu=force default. 

CONFIG_BOOTPARAM_IOMMU_FORCE=y would be the right name for it. (disabled 

all i'm suggesting is to please expose existing debug capabilities in 
the .config space, so that it can be tested in automated setups.

	Ingo
--

From: H. Peter Anvin
Date: Tuesday, September 30, 2008 - 1:53 pm

Boot configurations, too, can be tested in automated setups, and there 
is benefit to being able to do that without compilation.  However, what 
we'd need is a machine-readable description that one can select a 
configuration from.

	-hpa
--

From: FUJITA Tomonori
Date: Sunday, September 28, 2008 - 7:48 am

On Sat, 27 Sep 2008 20:14:38 +0200

Yeah, all the patches look trivial and fine.

Thanks,
--

From: Joerg Roedel
Date: Thursday, September 25, 2008 - 3:13 am

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/pci-gart_64.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aa569db..aecea06 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved;
 AGPEXTERN __u32 *agp_gatt_table;
 
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
-static int need_flush;		/* global flush state. set for each gart wrap */
+static bool need_flush;		/* global flush state. set for each gart wrap */
 
 static unsigned long alloc_iommu(struct device *dev, int size,
 				 unsigned long align_mask)
@@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
 	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
 				  size, base_index, boundary_size, align_mask);
 	if (offset == -1) {
-		need_flush = 1;
+		need_flush = true;
 		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
 					  size, base_index, boundary_size,
 					  align_mask);
@@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size,
 		next_bit = offset+size;
 		if (next_bit >= iommu_pages) {
 			next_bit = 0;
-			need_flush = 1;
+			need_flush = true;
 		}
 	}
 	if (iommu_fullflush)
-		need_flush = 1;
+		need_flush = true;
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 
 	return offset;
@@ -136,7 +136,7 @@ static void flush_gart(void)
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	if (need_flush) {
 		k8_flush_garts();
-		need_flush = 0;
+		need_flush = false;
 	}
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 }
-- 
1.5.6.4


--

From: Joerg Roedel
Date: Thursday, September 25, 2008 - 3:13 am

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/pci-gart_64.c |   17 +++++++++--------
 include/asm-x86/gart.h        |    2 ++
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b85a2f9..aa569db 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -27,8 +27,8 @@
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
 #include <linux/sysdev.h>
+#include <linux/io.h>
 #include <asm/atomic.h>
-#include <asm/io.h>
 #include <asm/mtrr.h>
 #include <asm/pgtable.h>
 #include <asm/proto.h>
@@ -175,7 +175,8 @@ static void dump_leak(void)
 	       iommu_leak_pages);
 	for (i = 0; i < iommu_leak_pages; i += 2) {
 		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
-		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
+		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
+				0);
 		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
 	}
 	printk(KERN_DEBUG "\n");
@@ -688,7 +689,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	if (!error)
 		error = sysdev_register(&device_gart);
 	if (error)
-		panic("Could not register gart_sysdev -- would corrupt data on next suspend");
+		panic("Could not register gart_sysdev -- "
+		      "would corrupt data on next suspend");
 
 	flush_gart();
 
@@ -710,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	return -1;
 }
 
-extern int agp_amd64_init(void);
-
 static struct dma_mapping_ops gart_dma_ops = {
 	.map_single			= gart_map_single,
 	.unmap_single			= gart_unmap_single,
@@ -777,8 +777,8 @@ void __init gart_iommu_init(void)
 	    (no_agp && init_k8_gatt(&info) < 0)) {
 		if (max_pfn > MAX_DMA32_PFN) {
 			printk(KERN_WARNING "More than 4GB of memory "
-			       	          "but GART IOMMU not available.\n"
-			       KERN_WARNING "falling back to iommu=soft.\n");
+			       "but GART IOMMU not ...
Previous thread: none

Next thread: lockdep warning inside ide/bio by Dmitry Baryshkov on Thursday, September 25, 2008 - 3:53 am. (1 message)