Hi, this small patch series contains some cleanups for the K8 GART driver in the x86 architecture. diffstat: arch/x86/kernel/pci-gart_64.c | 38 ++++++++++++++++++-------------------- include/asm-x86/gart.h | 2 ++ 2 files changed, 20 insertions(+), 20 deletions(-) --
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kernel/pci-gart_64.c | 11 ++++-------
1 files changed, 4 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aecea06..84d541f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
info->aper_size = aper_size >> 20;
gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
- gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+ gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(gatt_size));
if (!gatt)
panic("Cannot allocate GATT table");
if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
panic("Could not set GART PTEs to uncacheable pages");
- memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
enable_gart_translations();
@@ -788,18 +788,15 @@ void __init gart_iommu_init(void)
iommu_size = check_iommu_size(info.aper_base, aper_size);
iommu_pages = iommu_size >> PAGE_SHIFT;
- iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+ iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(iommu_pages/8));
if (!iommu_gart_bitmap)
panic("Cannot allocate iommu bitmap\n");
- memset(iommu_gart_bitmap, 0, iommu_pages/8);
#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+ iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(iommu_pages*sizeof(void *)));
- if (iommu_leak_tab)
- memset(iommu_leak_tab, 0, iommu_pages * 8);
else
printk(KERN_DEBUG
"PCI-DMA: Cannot allocate leak trace area\n");
--
1.5.6.4
--
Argh, yes. Please try this one:
=
From c402fd7f5fe930d45a02ef863bf3fb61851e6ad0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 25 Sep 2008 12:39:06 +0200
Subject: [PATCH] x86/iommu: use __GFP_ZERO instead of memset for GART
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kernel/pci-gart_64.c | 13 +++++--------
1 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aecea06..d077116 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
info->aper_size = aper_size >> 20;
gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
- gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+ gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(gatt_size));
if (!gatt)
panic("Cannot allocate GATT table");
if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
panic("Could not set GART PTEs to uncacheable pages");
- memset(gatt, 0, gatt_size);
agp_gatt_table = gatt;
enable_gart_translations();
@@ -788,19 +788,16 @@ void __init gart_iommu_init(void)
iommu_size = check_iommu_size(info.aper_base, aper_size);
iommu_pages = iommu_size >> PAGE_SHIFT;
- iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+ iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(iommu_pages/8));
if (!iommu_gart_bitmap)
panic("Cannot allocate iommu bitmap\n");
- memset(iommu_gart_bitmap, 0, iommu_pages/8);
#ifdef CONFIG_IOMMU_LEAK
if (leak_trace) {
- iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
+ iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(iommu_pages*sizeof(void *)));
- if (iommu_leak_tab)
- memset(iommu_leak_tab, 0, iommu_pages * 8);
- else
+ if (!iommu_leak_tab)
printk(KERN_DEBUG
...applied these patches to tip/x86/iommu: 0114267: x86/iommu: use __GFP_ZERO instead of memset for GART 3610f21: x86/iommu: convert GART need_flush to bool 237a622: x86/iommu: make GART driver checkpatch clean thanks Joerg! Fujita, do they look fine to you too? Ingo --
another thing: How hard would it be to add an CONFIG_IOMMU_DEBUG option that forces as many DMA requests to go via the IOMMU as possible? This slows things down of course so it's only for debugging - but it also makes sure that we utilize the IOMMU code to the maximum - which is not normally the case. Would be nice to have it .config driven (default-disabled), so that -tip's randconfig testing can stumble upon it every now and then. I've got GART test-systems - this way we could find certain types of IOMMU breakages sooner. Ingo --
On Sat, 27 Sep 2008 20:18:55 +0200 If you use iommu=force boot option, GART always tries to use the IOMMU. --
For AMD IOMMU I disabled the round-robin allocator to stress-test the
code. This means that the address allocation bitmap is always traversed
from the first bit. In consequence the TLB flushing is stressed a lot
(both in hardware and software) because the same DMA addresses are used
again and again. For testing I hardcoded it into the driver but I can
also make it depend on CONFIG_IOMMU_DEBUG.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
yes - could you please make a new option for it, CONFIG_IOMMU_DEBUG_FORCE=y or so - and cover all iommus that support it? Ingo --
On Mon, 29 Sep 2008 10:56:47 +0200 I'm not sure we are talking about the same thing. Surely, I and Joerg are talking different things. GART driver doesn't need to use the IOMMU hardware at all times. GART does a virtual mapping only when necessary (a device needs to handle an address that it can't access to). But as I wrote, if you use iommu=force, GART driver always use the IOMMU hardware. Other x86 IOMMU drivers always use the IOMMU hardware. Except for Intel VT-D, they manage free virtual I/O space in the round-robin manner with the bitmap algorithm to avoid frequent IOTLB flush. Joerg said he tested AMD IOMMU driver with the round-robin manner disabled so AMD IOMMU driver uses the same virtual I/O space with lots of IOTLB flush. --
yes - but iommu=force is not randconfig covered, hence it never gets tested by -tip testing. So my suggestion was a really simple patch: a new Kconfig entry that makes iommu=force default. CONFIG_BOOTPARAM_IOMMU_FORCE=y would be the right name for it. (disabled all i'm suggesting is to please expose existing debug capabilities in the .config space, so that it can be tested in automated setups. Ingo --
Boot configurations, too, can be tested in automated setups, and there is benefit to being able to do that without compilation. However, what we'd need is a machine-readable description that one can select a configuration from. -hpa --
On Sat, 27 Sep 2008 20:14:38 +0200 Yeah, all the patches look trivial and fine. Thanks, --
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kernel/pci-gart_64.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aa569db..aecea06 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
-static int need_flush; /* global flush state. set for each gart wrap */
+static bool need_flush; /* global flush state. set for each gart wrap */
static unsigned long alloc_iommu(struct device *dev, int size,
unsigned long align_mask)
@@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
size, base_index, boundary_size, align_mask);
if (offset == -1) {
- need_flush = 1;
+ need_flush = true;
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
size, base_index, boundary_size,
align_mask);
@@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size,
next_bit = offset+size;
if (next_bit >= iommu_pages) {
next_bit = 0;
- need_flush = 1;
+ need_flush = true;
}
}
if (iommu_fullflush)
- need_flush = 1;
+ need_flush = true;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
return offset;
@@ -136,7 +136,7 @@ static void flush_gart(void)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) {
k8_flush_garts();
- need_flush = 0;
+ need_flush = false;
}
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
--
1.5.6.4
--
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kernel/pci-gart_64.c | 17 +++++++++--------
include/asm-x86/gart.h | 2 ++
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index b85a2f9..aa569db 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -27,8 +27,8 @@
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#include <linux/sysdev.h>
+#include <linux/io.h>
#include <asm/atomic.h>
-#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
@@ -175,7 +175,8 @@ static void dump_leak(void)
iommu_leak_pages);
for (i = 0; i < iommu_leak_pages; i += 2) {
printk(KERN_DEBUG "%lu: ", iommu_pages-i);
- printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0);
+ printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
+ 0);
printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
}
printk(KERN_DEBUG "\n");
@@ -688,7 +689,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
if (!error)
error = sysdev_register(&device_gart);
if (error)
- panic("Could not register gart_sysdev -- would corrupt data on next suspend");
+ panic("Could not register gart_sysdev -- "
+ "would corrupt data on next suspend");
flush_gart();
@@ -710,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
return -1;
}
-extern int agp_amd64_init(void);
-
static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single,
.unmap_single = gart_unmap_single,
@@ -777,8 +777,8 @@ void __init gart_iommu_init(void)
(no_agp && init_k8_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
printk(KERN_WARNING "More than 4GB of memory "
- "but GART IOMMU not available.\n"
- KERN_WARNING "falling back to iommu=soft.\n");
+ "but GART IOMMU not ...