Hi, this is version 2 of the patchset which introduces code to debug drivers usage of the DMA-API. Many thanks to all the reviewers and the useful comments on the fist version of this patchset. Tests with hardware IOMMUs have shown several bugs in drivers regarding the usage of that API. Problems were found especially in network card drivers. These bugs often don't show up or have any negative impact if there is no hardware IOMMU in use in the system. But with an hardware IOMMU these bugs turn the hardware unusable or, in the worst case, cause data corruption on devices which are managed by other (good) drivers. With the code these patches introduce driver developers can find several bugs of misusing the DMA-API in their drivers. But be aware, it can not find all possible bugs. If it finds a problem it prints out messages like ------------[ cut here ]------------ WARNING: at /data2/repos/linux.trees.git/lib/dma-debug.c:231 check_unmap+0xab/0x3d9() Hardware name: Toonie bnx2 0000:01:00.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000000011] Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.28 #174 Call Trace: <IRQ> [<ffffffff8105af3a>] warn_slowpath+0xd3/0xf2 [<ffffffff8107c36f>] ? find_usage_backwards+0xe2/0x116 [<ffffffff8107c36f>] ? find_usage_backwards+0xe2/0x116 [<ffffffff812efd16>] ? usb_hcd_link_urb_to_ep+0x94/0xa0 [<ffffffff8107c52b>] ? mark_lock+0x1c/0x364 [<ffffffff8107d8f7>] ? __lock_acquire+0xaec/0xb55 [<ffffffff8107c52b>] ? mark_lock+0x1c/0x364 [<ffffffff811e2b4b>] ? get_hash_bucket+0x28/0x33 [<ffffffff814b25a5>] ? _spin_lock_irqsave+0x69/0x75 [<ffffffff811e2b4b>] ? get_hash_bucket+0x28/0x33 [<ffffffff811e2ff2>] check_unmap+0xab/0x3d9 [<ffffffff8107c9ed>] ? trace_hardirqs_on_caller+0x108/0x14a [<ffffffff8107ca3c>] ? trace_hardirqs_on+0xd/0xf [<ffffffff811e3433>] debug_unmap_single+0x3e/0x40 [<ffffffff8128d2d8>] dma_unmap_single+0x3d/0x60 [<ffffffff8128d335>] ...
Impact: add debug callbacks for dma_sync_single_range_for_* functions
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 26 ++++++++++++++++++++++++++
lib/dma-debug.c | 23 +++++++++++++++++++++++
2 files changed, 49 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index f39c2a8..b9c221a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -54,6 +54,16 @@ extern void debug_sync_single_for_device(struct device *dev,
dma_addr_t dma_handle,
size_t size, int direction);
+extern void debug_sync_single_range_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset, size_t size,
+ int direction);
+
+extern void debug_sync_single_range_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset,
+ size_t size, int direction);
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_init(u32 num_entries)
@@ -104,6 +114,22 @@ static inline void debug_sync_single_for_device(struct device *dev,
{
}
+static inline void debug_sync_single_range_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset,
+ size_t size,
+ int direction)
+{
+}
+
+static inline void debug_sync_single_range_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset,
+ size_t size,
+ int direction)
+{
+}
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index a7f2369..6f73bed 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -669,3 +669,26 @@ void debug_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
}
EXPORT_SYMBOL(debug_sync_single_for_device);
+void debug_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ unsigned ...Impact: add code to initialize dma-debug core data structures
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 14 +++++++++
lib/dma-debug.c | 66 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index ce4ace7..345d538 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -20,6 +20,20 @@
#ifndef __DMA_DEBUG_H
#define __DMA_DEBUG_H
+#include <linux/types.h>
+
struct device;
+#ifdef CONFIG_DMA_API_DEBUG
+
+extern void dma_debug_init(u32 num_entries);
+
+#else /* CONFIG_DMA_API_DEBUG */
+
+static inline void dma_debug_init(u32 num_entries)
+{
+}
+
+#endif /* CONFIG_DMA_API_DEBUG */
+
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index c5444d4..b932f15 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -21,6 +21,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/list.h>
+#include <linux/slab.h>
#define HASH_SIZE 256
#define HASH_FN_SHIFT 20
@@ -195,3 +196,68 @@ static void dma_entry_free(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&free_entries_lock, flags);
}
+/*
+ * DMA-API debugging init code
+ *
+ * The init code does two things:
+ * 1. Initialize core data structures
+ * 2. Preallocate a given number of dma_debug_entry structs
+ */
+
+static int prealloc_memory(u32 num_entries)
+{
+ struct dma_debug_entry *entry, *next_entry;
+ int i;
+
+ for (i = 0; i < num_entries; ++i) {
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL | __GFP_ZERO);
+ if (!entry)
+ goto out_err;
+
+ list_add_tail(&entry->list, &free_entries);
+ }
+
+ num_free_entries = num_entries;
+ min_free_entries = num_entries;
+
+ printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
+ num_entries);
+
+ return 0;
+
+out_err:
+
+ list_for_each_entry_safe(entry, next_entry, ...On Fri, Jan 09, 2009 at 05:19:19PM +0100, Joerg Roedel (joerg.roedel@amd.com) wrote: kzalloc? -- Evgeniy Polyakov --
True. kzalloc is better. I will change that. Joerg --
Impact: implement necessary functions for the core hash
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
lib/dma-debug.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 101 insertions(+), 0 deletions(-)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d04f8b6..74a0f36 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -18,9 +18,14 @@
*/
#include <linux/dma-debug.h>
+#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/list.h>
+#define HASH_SIZE 256
+#define HASH_FN_SHIFT 20
+#define HASH_FN_MASK 0xffULL
+
enum {
dma_debug_single,
dma_debug_sg,
@@ -37,3 +42,99 @@ struct dma_debug_entry {
int direction;
};
+struct hash_bucket {
+ struct list_head list;
+ spinlock_t lock;
+} ____cacheline_aligned;
+
+/* Hash list to save the allocated dma addresses */
+static struct hash_bucket dma_entry_hash[HASH_SIZE];
+
+/*
+ * Hash related functions
+ *
+ * Every DMA-API request is saved into a struct dma_debug_entry. To
+ * have quick access to these structs they are stored into a hash.
+ */
+static int hash_fn(struct dma_debug_entry *entry)
+{
+ /*
+ * Hash function is based on the dma address.
+ * We use bits 20-27 here as the index into the hash
+ */
+ return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
+}
+
+/*
+ * Request exclusive access to a hash bucket for a given dma_debug_entry.
+ */
+static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
+ unsigned long *flags)
+{
+ int idx = hash_fn(entry);
+ unsigned long __flags;
+
+ spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
+ *flags = __flags;
+ return &dma_entry_hash[idx];
+}
+
+/*
+ * Give up exclusive access to the hash bucket
+ */
+static void put_hash_bucket(struct hash_bucket *bucket,
+ unsigned long *flags)
+{
+ unsigned long __flags = *flags;
+
+ spin_unlock_irqrestore(&bucket->lock, __flags);
+}
+
+/*
+ * Search a given entry in ...Hi Joerg. Do you really need this getting they are called only from single place? -- Evgeniy Polyakov --
Because everything else I thought about here was even more ugly. But maybe you have a better idea? I tried to lock directly in the debug_ Hmm, true. I will inline these functions. Joerg --
I believe that having direct locking in the debug_ functions is not a duplication, anyone will have a direct vision on the locking and hash array dereference, and this will be just one additional line compared to the get_* call and the same number of lines for the put :) -- Evgeniy Polyakov --
Even more additional lines because of the additional variables needed in every function. Anyway, I try it and if it does not look good I will keep that change ;) Joerg --
__cacheline_aligned_in_smp. This all looks like an exotically large amount of code for a debug thingy? --
Perhaps so, but it's a useful debug thingy which lets us debug _very_ common problems in drivers, which many people are unlikely to notice without this 'assistance'. -- David Woodhouse Open Source Technology Centre David.Woodhouse@intel.com Intel Corporation --
this code checks the DMA usage of ~1 million lines of kernel code - all the DMA using drivers. I think Joerg's feature is hugely relevant as DMA scribbles are one of the hardest to debug kernel bugs: they can end up in permanent data corruption or other hard to find bugs. In fact i think his patchset is rather simple and even having 10 times as much debug code would pay for its existence in the long run. Ingo --
Have we previously found bugs by other means which this facility would have detected? I don't recall any... --
this facility found a handful of real bugs already - Joerg, do you have more specifics about that? Also, such a facility would be useful during driver development, when such bugs occur en masse. Faster driver development under Linux is certainly desirable. This facility basically adds a sandbox to all DMA ops and tracks the lifetime and validity of DMA operations. Given how ugly DMA bugs can be in practice and how hard to debug they are, i see this as good step forward. Ingo --
Yes. We've found network driver bugs which only showed up when an IOMMU has been active -- and were painful to diagnose because they involved DMA going wrong. It happens when drivers make mistakes with the DMA mapping APIs. With this debug facility, we can find such problems on _any_ hardware, and get a sane report about what's wrong. -- David Woodhouse Open Source Technology Centre David.Woodhouse@intel.com Intel Corporation --
btw., during the past decade we have had countless very ugly driver DMA bugs in the past that took vendors months and specialized equipment to track down. I cannot give you a number breakdown, only an impression: storage drivers tended to be the hardest hit (due to the severity of the bugs and due to their inherent complexity) - but DMA bugs in networking drivers can be hard to track down too. Plus there's another benefit: if a driver passes this checking and there's still DMA related corruption observed, then the hardware / firmware becomes a stronger suspect. This helps debugging too. Ingo --
On Wed, 14 Jan 2009 18:48:04 +0100 We have had ugly DMA bugs in scsi drivers but I think that this new feature can find very few of them. I can't think of any SCSI driver bugs that this could find. This feature can't find any popular DMA bugs in scsi drivers, such as messing up driver's dma descriptor from a scatter gather list. But this can find some kinds of DMA bugs and it's is just about 1,000 lines. I don't see any problem about merging this. 1,000 lines it too large? Maybe cutting just 1,000 lines up into too many pieces is deceptive. --
See for example this bugfix: http://www.spinics.net/lists/netdev/msg83208.html The bug was found using the first version of this patchset. Joerg -- | Advanced Micro Devices GmbH Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München System | Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München | Registergericht München, HRB Nr. 43632 --
Impact: add documentation about DMA-API debugging to DMA-API.txt Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> --- Documentation/DMA-API.txt | 117 +++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 117 insertions(+), 0 deletions(-) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index b462bb1..e36e85a 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -610,3 +610,120 @@ size is the size (and should be a page-sized multiple). The return value will be either a pointer to the processor virtual address of the memory, or an error (via PTR_ERR()) if any part of the region is occupied. + +Part III - Debug drivers use of the DMA-API +------------------------------------------- + +The DMA-API as described above as some constraints. DMA addresses must be +released with the corresponding function with the same size for example. With +the advent of hardware IOMMUs it becomes more and more important that drivers +do not violate those constraints. In the worst case such a violation can +result in data corruption up to destroyed filesystems. + +To debug drivers and find bugs in the usage of the DMA-API checking code can +be compiled into the kernel which will tell the developer about those +violations. If your architecture supports it you can select the "Enable +debugging of DMA-API usage" option in your kernel configuration. Enabling this +option has a performance impact. Do not enable it in production kernels. + +If you boot the resulting kernel will contain code which does some bookkeeping +about what DMA memory was allocated for which device. If this code detects an +error it prints a warning message with some details into your kernel log. An +example warning message may look like this: + +------------[ cut here ]------------ +WARNING: at /data2/repos/linux.trees.git/lib/dma-debug.c:231 + check_unmap+0xab/0x3d9() +Hardware name: Toonie +bnx2 0000:01:00.0: DMA-API: device driver tries to free DMA + memory it has ...
Impact: add debug callbacks for dma_{un}map_sg
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 15 ++++++++++++
lib/dma-debug.c | 53 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 82ae9ca..b2131f4 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -34,6 +34,11 @@ extern void debug_map_single(struct device *dev, void *ptr, size_t size,
extern void debug_unmap_single(struct device *dev, dma_addr_t addr,
size_t size, int direction);
+extern void debug_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int direction);
+
+extern void debug_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir);
#else /* CONFIG_DMA_API_DEBUG */
@@ -52,6 +57,16 @@ static inline void debug_unmap_single(struct device *dev, dma_addr_t addr,
{
}
+static inline void debug_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int direction)
+{
+}
+
+static inline void debug_unmap_sg(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems, int dir)
+{
+}
#endif /* CONFIG_DMA_API_DEBUG */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d4d14e5..e6d45f9 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/spinlock.h>
@@ -551,3 +552,55 @@ void debug_unmap_single(struct device *dev, dma_addr_t addr,
}
EXPORT_SYMBOL(debug_unmap_single);
+void debug_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int direction)
+{
+ struct dma_debug_entry *entry;
+ struct scatterlist *s;
+ int i;
+
+ if (global_disable)
+ return;
+
+ for_each_sg(sg, s, nents, i) ...Will this print false errors if above map debug failed to add an entry into the list? -- Evgeniy Polyakov --
No. The code disables itself if adding an entry fails. This can only happen when we run out of preallocated dma_debug_entries. Joerg --
Impact: add debugfs interface for configuring DMA-API debugging
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
lib/dma-debug.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 78 insertions(+), 0 deletions(-)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4e58d09..ca0ccb1 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -19,6 +19,7 @@
#include <linux/dma-debug.h>
#include <linux/spinlock.h>
+#include <linux/debugfs.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -58,12 +59,29 @@ static DEFINE_SPINLOCK(free_entries_lock);
/* Global disable flag - will be set in case of an error */
static bool global_disable __read_mostly;
+/* Global error count */
+static u32 error_count;
+
+/* Global error show enable*/
+static u32 show_all_errors __read_mostly;
+/* Number of errors to show */
+static u32 show_num_errors = 1;
+
static u32 num_free_entries;
static u32 min_free_entries;
/* number of preallocated entries requested by kernel cmdline */
static u32 req_entries;
+/* debugfs dentry's for the stuff above */
+static struct dentry *dma_debug_dent;
+static struct dentry *global_disable_dent;
+static struct dentry *error_count_dent;
+static struct dentry *show_all_errors_dent;
+static struct dentry *show_num_errors_dent;
+static struct dentry *num_free_entries_dent;
+static struct dentry *min_free_entries_dent;
+
/*
* Hash related functions
*
@@ -238,6 +256,58 @@ out_err:
return -ENOMEM;
}
+static int dma_debug_fs_init(void)
+{
+ dma_debug_dent = debugfs_create_dir("dma-api", NULL);
+ if (!dma_debug_dent) {
+ printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
+ return -ENOMEM;
+ }
+
+ global_disable_dent = debugfs_create_bool("disabled", 0444,
+ dma_debug_dent,
+ (u32 *)&global_disable);
+ if (!global_disable_dent)
+ goto out_err;
+
+ error_count_dent = debugfs_create_u32("error_count", 0444,
+ dma_debug_dent, ...Small detail: please use native C types for non-hardware variables - 'unsigned int', 'unsigned long', etc. u32/u64 is typically used for variables where there is a real significance to the precise width of the variable: there's either a hardware or a user-space ABI involved. Ingo --
Hmm, these variables are exported to debugfs. Thats the reason I made them u32. Joerg --
There is some kind of userspace ABI: all these variables are exported
via debugfs. And debugfs relies on these types.
Joerg
--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
System |
Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
| Registergericht München, HRB Nr. 43632
--
Impact: add functions to check on dma unmap and sync
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
lib/dma-debug.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 133 insertions(+), 0 deletions(-)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ca0ccb1..9f730a4 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -17,9 +17,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
+#include <linux/device.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -82,6 +84,22 @@ static struct dentry *show_num_errors_dent;
static struct dentry *num_free_entries_dent;
static struct dentry *min_free_entries_dent;
+static char *type2name[3] = { "single", "scather-gather", "coherent" };
+
+static char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+ "DMA_FROM_DEVICE", "DMA_NONE" };
+
+#define err_printk(dev, format, arg...) do { \
+ error_count += 1; \
+ if (show_all_errors || show_num_errors > 0) { \
+ WARN(1, "%s %s: " format, \
+ dev_driver_string(dev), \
+ dev_name(dev) , ## arg); \
+ } \
+ if (!show_all_errors && show_num_errors > 0) \
+ show_num_errors -= 1; \
+ } while (0);
+
/*
* Hash related functions
*
@@ -377,3 +395,118 @@ static __init int dma_debug_entries_cmdline(char *str)
__setup("dma_debug=", dma_debug_cmdline);
__setup("dma_debug_entries=", dma_debug_entries_cmdline);
+static void check_unmap(struct dma_debug_entry *ref)
+{
+ struct dma_debug_entry *entry;
+ struct hash_bucket *bucket;
+ unsigned long flags;
+
+ if (dma_mapping_error(ref->dev, ref->dev_addr))
+ return;
+
+ bucket = get_hash_bucket(ref, &flags);
+ entry = hash_bucket_find(bucket, ref);
+
+ if (!entry) {
+ err_printk(ref->dev, "DMA-API: device driver tries "
+ "to free DMA memory ...Note that the arithmetics here is SMP-unsafe: we only hold the hash bucket so if two errors hit at once on two CPUs then the error tracking variables can be accessed at once. I'd suggest a simple global lock for this error case (taken inside the hash bucket lock), to be on the safe side. Also, please dont use a macro for this - printk details can be passed in to helper inlines/functions too. Ingo --
Yeah, this is not SMP-safe, I know. But debugfs does not support atomic_t so I made the variables u32. But at least a race condition has not a too bad impact. What may habben is that error_count misses a error or the show_num_errors become negative. But if we really want to avoid this I think its better to add atomic_t support to debugfs. What do you think? Joerg --
Even a global lock will not really help here because show_num_errors and show_all_errors can be set using debugfs. Either we live with the small race (with limited impact) or I add atomic_t support to debugfs. Joerg --
As I wrote in a previous email, a race here is no big deal. I add a
Hmm, how does this look like? There is not WARN variant which can use
va_args as a parameter. And it is important that the error message is
logged in the warning itself. So the driver developer can see it when a
user reports the warning.
Joerg
--
| Advanced Micro Devices GmbH
Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
System |
Research | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
| Registergericht München, HRB Nr. 43632
--
we commonly use global locks in debug exception cases - to serialize console output. But it's certainly no big deal. Ingo --
You must be using Vim and copy & paste messed up? :) Ingo --
Impact: add groundwork for DMA-API debugging Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> --- include/linux/dma-debug.h | 25 +++++++++++++++++++++++++ lib/Makefile | 2 ++ lib/dma-debug.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 0 deletions(-) create mode 100644 include/linux/dma-debug.h create mode 100644 lib/dma-debug.c diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h new file mode 100644 index 0000000..ce4ace7 --- /dev/null +++ b/include/linux/dma-debug.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __DMA_DEBUG_H +#define __DMA_DEBUG_H + +struct device; + +#endif /* __DMA_DEBUG_H */ diff --git a/lib/Makefile b/lib/Makefile index 32b0e64..50b48cf 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -84,6 +84,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/dma-debug.c b/lib/dma-debug.c new file mode 100644 index 0000000..d04f8b6 --- /dev/null +++ b/lib/dma-debug.c @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2008 Advanced Micro ...
On Fri, 9 Jan 2009 17:19:16 +0100 I don't think that cpu addresses are appropriate here. --
Impact: make use of DMA-API debugging code in x86
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/dma-mapping.h | 30 ++++++++++++++++++++++++++----
arch/x86/kernel/pci-dma.c | 5 +++++
3 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 862adb9..68a806c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -39,6 +39,7 @@ config X86
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select USER_STACKTRACE_SUPPORT
+ select HAVE_DMA_API_DEBUG
config ARCH_DEFCONFIG
string
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4035357..939d5b3 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -7,6 +7,7 @@
*/
#include <linux/scatterlist.h>
+#include <linux/dma-debug.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
#include <asm-generic/dma-coherent.h>
@@ -93,9 +94,12 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size,
int direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+ dma_addr_t addr;
BUG_ON(!valid_dma_direction(direction));
- return ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+ addr = ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+ debug_map_single(hwdev, ptr, size, direction, addr);
+ return addr;
}
static inline void
@@ -105,6 +109,7 @@ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
struct dma_mapping_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(direction));
+ debug_unmap_single(dev, addr, size, direction);
if (ops->unmap_single)
ops->unmap_single(dev, addr, size, direction);
}
@@ -114,9 +119,13 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, int direction)
{
struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+ int ret;
...It all looks very nice, i've got one small namespace structure request: could you please name all the callbacks in a consistent way, so that they mirror the method they instrument - with a "debug_" prefix? I.e. the above one would be: debug_dma_unmap_single(). Ingo --
Ah true. The debug_map_single function has to check for No, this can not happen. If the code fails it disables itself so no more warnings will be printed. Joerg --
Impact: add debug callbacks for dma_sync_single_for_* functions
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 20 ++++++++++++++++++++
lib/dma-debug.c | 20 ++++++++++++++++++++
2 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index a28a701..f39c2a8 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -46,6 +46,14 @@ extern void debug_alloc_coherent(struct device *dev, size_t size,
extern void debug_free_coherent(struct device *dev, size_t size,
void *virt, dma_addr_t addr);
+extern void debug_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ int direction);
+
+extern void debug_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction);
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_init(u32 num_entries)
@@ -84,6 +92,18 @@ static inline void debug_free_coherent(struct device *dev, size_t size,
{
}
+static inline void debug_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index a4a5b0f..a7f2369 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,3 +649,23 @@ void debug_free_coherent(struct device *dev, size_t size,
}
EXPORT_SYMBOL(debug_free_coherent);
+void debug_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ if (global_disable)
+ return;
+
+ check_sync(dev, dma_handle, size, 0, direction, true);
+}
+EXPORT_SYMBOL(debug_sync_single_for_cpu);
+
+void debug_sync_single_for_device(struct ...Impact: add dma_debug= and dma_debug_entries= kernel parameters
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
Documentation/kernel-parameters.txt | 10 +++++++++
lib/dma-debug.c | 38 +++++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fb84902..4045f76 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -486,6 +486,16 @@ and is between 256 and 4096 characters. It is defined in the file
Range: 0 - 8192
Default: 64
+ dma_debug=off If the kernel is compiled with DMA_API_DEBUG support
+ this option disables the debugging code at boot.
+
+ dma_debug_entries=<number>
+ This option allows to tune the number of preallocated
+ entries for DMA-API debugging code. One entry is
+ required per DMA-API allocation. Use this if the
+ DMA-API debugging code disables itself because the
+ architectural default is too low.
+
hpet= [X86-32,HPET] option to control HPET usage
Format: { enable (default) | disable | force }
disable: disable HPET and use PIT instead
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index b932f15..4e58d09 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -61,6 +61,9 @@ static bool global_disable __read_mostly;
static u32 num_free_entries;
static u32 min_free_entries;
+/* number of preallocated entries requested by kernel cmdline */
+static u32 req_entries;
+
/*
* Hash related functions
*
@@ -250,6 +253,9 @@ void dma_debug_init(u32 num_entries)
dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
}
+ if (req_entries)
+ num_entries = req_entries;
+
if (prealloc_memory(num_entries) != 0) {
printk(KERN_ERR "DMA-API: debugging out of memory error "
"- disabled\n");
@@ -261,3 +267,35 @@ void dma_debug_init(u32 num_entries)
printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
}
...Impact: add debug callbacks for dma_[alloc|free]_coherent
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 16 ++++++++++++++++
lib/dma-debug.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index b2131f4..a28a701 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -40,6 +40,12 @@ extern void debug_map_sg(struct device *dev, struct scatterlist *sg,
extern void debug_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, int dir);
+extern void debug_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt);
+
+extern void debug_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr);
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_init(u32 num_entries)
@@ -68,6 +74,16 @@ static inline void debug_unmap_sg(struct device *dev,
{
}
+static inline void debug_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt)
+{
+}
+
+static inline void debug_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr)
+{
+}
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index e6d45f9..a4a5b0f 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -604,3 +604,48 @@ void debug_unmap_sg(struct device *dev, struct scatterlist *sglist,
}
EXPORT_SYMBOL(debug_unmap_sg);
+void debug_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt)
+{
+ struct dma_debug_entry *entry;
+
+ if (global_disable)
+ return;
+
+ if (dma_addr == bad_dma_address)
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->type = dma_debug_coherent;
+ entry->dev = dev;
+ entry->cpu_addr = virt;
+ entry->size ...On Fri, 9 Jan 2009 17:19:25 +0100 Only X86 has 'bad_dma_address' (IA64 also has with some configurations though). You need to use dma_mapping_error, as I pointed out in the previous submission. I recommend you to try this on one non-x86 box (at least). Even if you don't have non x86, you can use cross compiler. --
On Sun, 11 Jan 2009 15:25:42 +0900 Oops, this is for dma_alloc_coherent so you need to check 'virt' instead of dma_mapping_error(). --
Ah, true. Thanks. I will fix that. Joerg --
Impact: add a Kconfig entry for DMA-API debugging Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> --- arch/Kconfig | 2 ++ lib/Kconfig.debug | 11 +++++++++++ 2 files changed, 13 insertions(+), 0 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 2e13aa2..068554c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -103,3 +103,5 @@ config HAVE_CLK The <linux/clk.h> calls support software clock gating and thus are a key power management tool on many systems. +config HAVE_DMA_API_DEBUG + bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2e75478..cba5778 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -899,6 +899,17 @@ config DYNAMIC_PRINTK_DEBUG debugging for all modules. This mode can be turned off via the above disable command. +config DMA_API_DEBUG + bool "Enable debugging of DMA-API usage" + depends on HAVE_DMA_API_DEBUG + help + Enable this option to debug the use of the DMA API by device drivers. + With this option you will be able to detect common bugs in device + drivers like double-freeing of DMA mappings or freeing mappings that + were never allocated. + This option causes a performance degredation. Use only if you want + to debug device drivers. If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" -- 1.5.6.4 --
Impact: add debug callbacks for dma_{un}map_single
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 19 +++++++++++++++++++
lib/dma-debug.c | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 60 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 345d538..82ae9ca 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -28,12 +28,31 @@ struct device;
extern void dma_debug_init(u32 num_entries);
+extern void debug_map_single(struct device *dev, void *ptr, size_t size,
+ int direction, dma_addr_t dma_addr);
+
+extern void debug_unmap_single(struct device *dev, dma_addr_t addr,
+ size_t size, int direction);
+
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_init(u32 num_entries)
{
}
+static inline void debug_map_single(struct device *dev, void *ptr,
+ size_t size, int direction,
+ dma_addr_t dma_addr)
+{
+}
+
+static inline void debug_unmap_single(struct device *dev, dma_addr_t addr,
+ size_t size, int direction)
+{
+}
+
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9f730a4..d4d14e5 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -510,3 +510,44 @@ out:
}
+void debug_map_single(struct device *dev, void *ptr, size_t size,
+ int direction, dma_addr_t dma_addr)
+{
+ struct dma_debug_entry *entry;
+
+ if (global_disable)
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->dev = dev;
+ entry->type = dma_debug_single;
+ entry->cpu_addr = ptr;
+ entry->dev_addr = dma_addr;
+ entry->size = size;
+ entry->direction = direction;
+
+ add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_map_single);
+
+void debug_unmap_single(struct device *dev, dma_addr_t addr,
+ size_t size, int direction)
+{
+ struct dma_debug_entry ref ...Impact: add allocator code for struct dma_debug_entry
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
lib/dma-debug.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 57 insertions(+), 0 deletions(-)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 74a0f36..c5444d4 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -49,6 +49,16 @@ struct hash_bucket {
/* Hash list to save the allocated dma addresses */
static struct hash_bucket dma_entry_hash[HASH_SIZE];
+/* List of pre-allocated dma_debug_entry's */
+static LIST_HEAD(free_entries);
+/* Lock for the list above */
+static DEFINE_SPINLOCK(free_entries_lock);
+
+/* Global disable flag - will be set in case of an error */
+static bool global_disable __read_mostly;
+
+static u32 num_free_entries;
+static u32 min_free_entries;
/*
* Hash related functions
@@ -138,3 +148,50 @@ static void add_dma_entry(struct dma_debug_entry *entry)
put_hash_bucket(bucket, &flags);
}
+/* struct dma_entry allocator
+ *
+ * The next two functions implement the allocator for
+ * struct dma_debug_entries.
+ */
+static struct dma_debug_entry *dma_entry_alloc(void)
+{
+ struct dma_debug_entry *entry = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ if (list_empty(&free_entries)) {
+ printk(KERN_ERR "DMA-API: debugging out of memory "
+ "- disabling\n");
+ global_disable = true;
+ goto out;
+ }
+
+ entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+ list_del(&entry->list);
+ memset(entry, 0, sizeof(*entry));
+
+ num_free_entries -= 1;
+ if (num_free_entries < min_free_entries)
+ min_free_entries = num_free_entries;
+
+out:
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ return entry;
+}
+
+static void dma_entry_free(struct dma_debug_entry *entry)
+{
+ unsigned long flags;
+
+ /*
+ * add to beginning of the list - this way the entries are
+ * more likely cache hot when they are ...btw., i'd suggest to not break kernel messages mid-string, but do "DMA-API: debugging out of memory - disabling\n"); Also, i'd use WARN() - it might be useful to see what callsite depleted unlikely() i guess. it might make sense to cache entries in the buckets - hence reuse the bucket spinlock. This means a somewhat higher effective pool size, but it also avoids a global lock. Ingo --
Impact: add debug callbacks for dma_sync_sg_* functions
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/dma-debug.h | 19 +++++++++++++++++++
lib/dma-debug.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 0 deletions(-)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index b9c221a..bdba8c8 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -64,6 +64,13 @@ extern void debug_sync_single_range_for_device(struct device *dev,
unsigned long offset,
size_t size, int direction);
+extern void debug_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction);
+
+extern void debug_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction);
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_init(u32 num_entries)
@@ -130,6 +137,18 @@ static inline void debug_sync_single_range_for_device(struct device *dev,
{
}
+static inline void debug_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+}
+
+static inline void debug_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+}
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 6f73bed..e40c88c 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -692,3 +692,35 @@ void debug_sync_single_range_for_device(struct device *dev,
}
EXPORT_SYMBOL(debug_sync_single_range_for_device);
+void debug_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction)
+{
+ struct scatterlist *s;
+ int i;
+
+ if (global_disable)
+ return;
+
+ for_each_sg(sg, s, nelems, i) {
+ check_sync(dev, s->dma_address, s->dma_length, 0,
+ direction, ..."if (unlikely(global_disable))" i suspect? Ingo --
True. I will add unlikely() to all global_disable checks. Joerg --
This was triggered during pci_unmap_page() -> dma_unmap_single() where check_unmap() did not find the entry. The original mapping was done in bnx2 using pci_map_page(). I did not see how the debug entry was added to the hash during the call to pci_map_page() -> dma_map_page(). Did I miss something? Thanks. --
dma_map_page() results in dma_map_single() -> debug_map_single() call on x86. This way the entry would be added. Maybe the error from a double free? Joerg --
Ah, it only calls ops->map_single. Thanks for pointing that out. I will add a call to debug_map_single to dma_map_page too. So the error above may be a false positive. Joerg --
On Fri, 9 Jan 2009 23:37:47 +0100 Then you will find that the interface of debug_map_single is wrong. You can't use cpu address here. I explained this to you before: http://marc.info/?t=123116736500005&r=1&w=2 --
Looks pretty good in general - modulo the few observations i just made in reply to the patches. (Note, the comments apply to all the patches - there's similar small issues in other places as well.) Did you have a chance to look at debugobjects and see whether it could be reused as the dma-debug-entry object management code? One request: could you please git-base it on the changes in tip/core/iommu (i only have looked at the patches in email - i presume the current ones are based on -git)? That way we'll have it on top of Fujita's very nice DMA-mapping consolidation code. Ingo --
Yes, I looked at it. But the code looks like it is used to debug object Yes, I will rebase it for the next post. Joerg --
This adds a function to dump the DMA mappings that the debugging code is
aware of -- either for a single device, or for _all_ devices.
This can be useful for debugging -- sticking a call to it in the DMA
page fault handler, for example, to see if the faulting address _should_
be mapped or not, and hence work out whether it's IOMMU bugs we're
seeing, or driver bugs.
I'd also like to make it answer the question 'should address X be mapped
for device Y', but I'll get to that next.
Do we have a %pX format for printing dma_addr_t yet?
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 8a8aae4..5f4fc9f 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -75,6 +75,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg,
int nelems, int direction);
+extern void debug_dma_dump_mappings(struct device *dev);
+
#else /* CONFIG_DMA_API_DEBUG */
static inline void dma_debug_init(u32 num_entries)
@@ -155,6 +157,10 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev,
{
}
+static inline void debug_dma_dump_mappings(struct device *dev)
+{
+}
+
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 469e5b9..127d108 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -191,6 +191,36 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
}
/*
+ * Dump mapping entries for debugging purposes
+ */
+void debug_dma_dump_mappings(struct device *dev)
+{
+ int idx;
+
+ for (idx = 0; idx < HASH_SIZE; idx++) {
+ struct hash_bucket *bucket = &dma_entry_hash[idx];
+ struct dma_debug_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bucket->lock, flags);
+
+ list_for_each_entry(entry, &bucket->list, list) {
+ if (!dev || dev == entry->dev) {
+ dev_info(entry->dev,
+ "%s idx %d P=%Lx D=%Lx L=%Lx ...extra tab Sample output below (2 of ~2500 faults), pages don't appear to be mapped: (interesting hash_fn spread ;-) [ 21.646796] DMAR:[DMA Write] Request device [03:00.0] fault addr ff9df000 [ 21.646796] DMAR:[fault reason 05] PTE Write access is not set [ 21.646799] iwlagn 0000:03:00.0: single idx 0 P=1347d4000 D=ffe00000 L=2100 DMA_FROM_DEVICE [ 21.646802] iwlagn 0000:03:00.0: single idx 0 P=1325d4000 D=ffc00000 L=2100 DMA_FROM_DEVICE [ 21.646804] iwlagn 0000:03:00.0: coherent idx 0 P=1322a0000 D=ffa00000 L=8000 DMA_BIDIRECTIONAL [ 21.646806] iwlagn 0000:03:00.0: single idx 2 P=1347d0000 D=ffe04000 L=2100 DMA_FROM_DEVICE [ 21.646808] iwlagn 0000:03:00.0: single idx 2 P=1325d0000 D=ffc04000 L=2100 DMA_FROM_DEVICE [ 21.646810] iwlagn 0000:03:00.0: single idx 4 P=1347cc000 D=ffe08000 L=2100 DMA_FROM_DEVICE [ 21.646812] iwlagn 0000:03:00.0: single idx 4 P=1325cc000 D=ffc08000 L=2100 DMA_FROM_DEVICE [ 21.646814] iwlagn 0000:03:00.0: coherent idx 4 P=132258000 D=ffa08000 L=8000 DMA_BIDIRECTIONAL [ 21.646816] iwlagn 0000:03:00.0: single idx 6 P=1347c8000 D=ffe0c000 L=2100 DMA_FROM_DEVICE [ 21.646818] iwlagn 0000:03:00.0: single idx 6 P=1325c8000 D=ffc0c000 L=2100 DMA_FROM_DEVICE [ 21.646820] iwlagn 0000:03:00.0: single idx 8 P=1347c4000 D=ffe10000 L=2100 DMA_FROM_DEVICE [ 21.646822] iwlagn 0000:03:00.0: single idx 8 P=1325c4000 D=ffc10000 L=2100 DMA_FROM_DEVICE [ 21.646824] iwlagn 0000:03:00.0: coherent idx 8 P=132230000 D=ffa10000 L=8000 DMA_BIDIRECTIONAL [ 21.646826] iwlagn 0000:03:00.0: single idx 10 P=1347c0000 D=ffe14000 L=2100 DMA_FROM_DEVICE [ 21.646828] iwlagn 0000:03:00.0: single idx 10 P=1325c0000 D=ffc14000 L=2100 DMA_FROM_DEVICE [ 21.646830] iwlagn 0000:03:00.0: single idx 12 P=1347bc000 D=ffe18000 L=2100 DMA_FROM_DEVICE [ 21.646832] iwlagn 0000:03:00.0: single idx 12 P=1325bc000 D=ffc18000 L=2100 DMA_FROM_DEVICE [ 21.646834] iwlagn 0000:03:00.0: coherent idx 12 P=1321e8000 D=ffa18000 L=8000 DMA_BIDIRECTIONAL [ 21.646836] iwlagn ...
Thanks. Your code to hook it up is better than mine too. I'll steal What machine did you get that on? Yeah, I saw one of those. If could be a driver bug, of course -- it could be unmapping a range before it's actually finished with it. But that's unlikely. An alternative explanation... The DMA is aborted¹, and the device interrupts us to tell us about it at the _same_ time that the IOMMU interrupts us to tell us about the fault. We process the device interrupt first, unmap that buffer. And then we process the IOMMU interrupt... and the buffer is already gone from the list. It might be interesting to make this code also remember and print the Yeah, that's a little suboptimal, isn't it :) -- David Woodhouse Open Source Technology Centre David.Woodhouse@intel.com Intel Corporation ¹ due to the bug we're chasing now. The range _is_ supposed to be mapped. --
One thing I noticed is many of the faults are the page prior to a coherent range 0x8000 in length, which seems to correspond to tfd buffer. No obvious off-by-one error anywhere, and not all faults fit that pattern, so w/out more iwlagn driver knowledge hard to say if that's meaningful That's what I was thinking too. Almost need a flight recorder mode to see if the range was ever mapped/unmapped. thanks, -chris --
[ 362.283661] Device 0000:03:00.0 mapping: 1000@fff1b000 [ 362.283727] DMAR:[DMA Write] Request device [03:00.0] fault addr fff1b000 [ 362.284719] Device 0000:03:00.0 unmapping: 1000@fff1b000 [ 362.426974] Device 0000:03:00.0 mapping: 1000@fff1b000 [ 362.427040] DMAR:[DMA Write] Request device [03:00.0] fault addr fff1b000 [ 362.429092] Device 0000:03:00.0 unmapping: 1000@fff1b000 [ 447.644332] Device 0000:03:00.0 mapping: 1000@fff03000 [ 447.644373] DMAR:[DMA Write] Request device [03:00.0] fault addr fff03000 [ 447.646008] Device 0000:03:00.0 unmapping: 1000@fff03000 [ 483.037641] Device 0000:03:00.0 mapping: 1000@ffc9f000 [ 483.037707] DMAR:[DMA Write] Request device [03:00.0] fault addr ffc9f000 [ 483.038699] Device 0000:03:00.0 unmapping: 1000@ffc9f000 ... Looks like driver is doing the right thing. --
Hmm, thats because all device addresses are 16kb aligned. The hashfn uses bits 13 to 21 as the index for the hash. I can move this window up to bits 18-26 if it improves the hash spread. Joerg --
BTW, here's how I hooked it up:
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c933980..df593d1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1072,6 +1072,34 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
spin_unlock_irqrestore(&iommu->register_lock, flag);
}
+static struct pci_dev *domain_find_pdev(u8 bus, u8 devfn)
+{
+ unsigned long flags;
+ struct pci_dev *pdev = NULL;
+ struct device_domain_info *info;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_for_each_entry(info, &device_domain_list, global) {
+ if (info->bus == bus && info->devfn == devfn) {
+ pdev = info->dev;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+ return pdev;
+}
+
+static void dump_mappings(u16 source_id)
+{
+ struct pci_dev *pdev;
+ u8 bus = source_id >> 8;
+ u8 devfn = source_id & 0xff;
+
+ pdev = domain_find_pdev(bus, devfn);
+ if (pdev)
+ debug_dma_dump_mappings(&pdev->dev);
+}
+
static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
u8 fault_reason, u16 source_id, unsigned long long addr)
{
@@ -1086,6 +1114,8 @@ static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
(type ? "DMA Read" : "DMA Write"),
(source_id >> 8), PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+
+ dump_mappings(source_id);
return 0;
}
--
Great. Thanks. Applied to my dma-api/debug branch. Joerg --
