Re: [PATCH 08/16] dma-debug: add core checking functions

Previous thread: none

Next thread: none
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Hi,

this is version 2 of the patchset which introduces code to debug drivers
usage of the DMA-API. Many thanks to all the reviewers and the useful
comments on the fist version of this patchset. Tests with hardware
IOMMUs have shown several bugs in drivers regarding the usage of that
API.  Problems were found especially in network card drivers.

These bugs often don't show up or have any negative impact if there is
no hardware IOMMU in use in the system. But with an hardware IOMMU these
bugs turn the hardware unusable or, in the worst case, cause data
corruption on devices which are managed by other (good) drivers.

With the code these patches introduce driver developers can find several
bugs of misusing the DMA-API in their drivers. But be aware, it can not
find all possible bugs. If it finds a problem it prints out messages
like

------------[ cut here ]------------
WARNING: at /data2/repos/linux.trees.git/lib/dma-debug.c:231 check_unmap+0xab/0x3d9()
Hardware name: Toonie
bnx2 0000:01:00.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000000011]
Modules linked in:
Pid: 0, comm: swapper Not tainted 2.6.28 #174
Call Trace:
 <IRQ>  [<ffffffff8105af3a>] warn_slowpath+0xd3/0xf2
 [<ffffffff8107c36f>] ? find_usage_backwards+0xe2/0x116
 [<ffffffff8107c36f>] ? find_usage_backwards+0xe2/0x116
 [<ffffffff812efd16>] ? usb_hcd_link_urb_to_ep+0x94/0xa0
 [<ffffffff8107c52b>] ? mark_lock+0x1c/0x364
 [<ffffffff8107d8f7>] ? __lock_acquire+0xaec/0xb55
 [<ffffffff8107c52b>] ? mark_lock+0x1c/0x364
 [<ffffffff811e2b4b>] ? get_hash_bucket+0x28/0x33
 [<ffffffff814b25a5>] ? _spin_lock_irqsave+0x69/0x75
 [<ffffffff811e2b4b>] ? get_hash_bucket+0x28/0x33
 [<ffffffff811e2ff2>] check_unmap+0xab/0x3d9
 [<ffffffff8107c9ed>] ? trace_hardirqs_on_caller+0x108/0x14a
 [<ffffffff8107ca3c>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffff811e3433>] debug_unmap_single+0x3e/0x40
 [<ffffffff8128d2d8>] dma_unmap_single+0x3d/0x60
 [<ffffffff8128d335>] ...
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debug callbacks for dma_sync_single_range_for_* functions

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   26 ++++++++++++++++++++++++++
 lib/dma-debug.c           |   23 +++++++++++++++++++++++
 2 files changed, 49 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index f39c2a8..b9c221a 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -54,6 +54,16 @@ extern void debug_sync_single_for_device(struct device *dev,
 					 dma_addr_t dma_handle,
 					 size_t size, int direction);
 
+extern void debug_sync_single_range_for_cpu(struct device *dev,
+					    dma_addr_t dma_handle,
+					    unsigned long offset, size_t size,
+					    int direction);
+
+extern void debug_sync_single_range_for_device(struct device *dev,
+					       dma_addr_t dma_handle,
+					       unsigned long offset,
+					       size_t size, int direction);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_init(u32 num_entries)
@@ -104,6 +114,22 @@ static inline void debug_sync_single_for_device(struct device *dev,
 {
 }
 
+static inline void debug_sync_single_range_for_cpu(struct device *dev,
+						   dma_addr_t dma_handle,
+						   unsigned long offset,
+						   size_t size,
+						   int direction)
+{
+}
+
+static inline void debug_sync_single_range_for_device(struct device *dev,
+						      dma_addr_t dma_handle,
+						      unsigned long offset,
+						      size_t size,
+						      int direction)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index a7f2369..6f73bed 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -669,3 +669,26 @@ void debug_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
 }
 EXPORT_SYMBOL(debug_sync_single_for_device);
 
+void debug_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				     unsigned ...
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add code to initialize dma-debug core data structures

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   14 +++++++++
 lib/dma-debug.c           |   66 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index ce4ace7..345d538 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -20,6 +20,20 @@
 #ifndef __DMA_DEBUG_H
 #define __DMA_DEBUG_H
 
+#include <linux/types.h>
+
 struct device;
 
+#ifdef CONFIG_DMA_API_DEBUG
+
+extern void dma_debug_init(u32 num_entries);
+
+#else /* CONFIG_DMA_API_DEBUG */
+
+static inline void dma_debug_init(u32 num_entries)
+{
+}
+
+#endif /* CONFIG_DMA_API_DEBUG */
+
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index c5444d4..b932f15 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -21,6 +21,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 
 #define HASH_SIZE       256
 #define HASH_FN_SHIFT   20
@@ -195,3 +196,68 @@ static void dma_entry_free(struct dma_debug_entry *entry)
 	spin_unlock_irqrestore(&free_entries_lock, flags);
 }
 
+/*
+ * DMA-API debugging init code
+ *
+ * The init code does two things:
+ *   1. Initialize core data structures
+ *   2. Preallocate a given number of dma_debug_entry structs
+ */
+
+static int prealloc_memory(u32 num_entries)
+{
+	struct dma_debug_entry *entry, *next_entry;
+	int i;
+
+	for (i = 0; i < num_entries; ++i) {
+		entry = kmalloc(sizeof(*entry), GFP_KERNEL | __GFP_ZERO);
+		if (!entry)
+			goto out_err;
+
+		list_add_tail(&entry->list, &free_entries);
+	}
+
+	num_free_entries = num_entries;
+	min_free_entries = num_entries;
+
+	printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
+			num_entries);
+
+	return 0;
+
+out_err:
+
+	list_for_each_entry_safe(entry, next_entry, ...
From: Evgeniy Polyakov
Date: Friday, January 9, 2009 - 10:58 am

On Fri, Jan 09, 2009 at 05:19:19PM +0100, Joerg Roedel (joerg.roedel@amd.com) wrote:

kzalloc?


-- 
	Evgeniy Polyakov
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 11:17 am

True. kzalloc is better. I will change that.

Joerg
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: implement necessary functions for the core hash

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 lib/dma-debug.c |  101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 101 insertions(+), 0 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d04f8b6..74a0f36 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -18,9 +18,14 @@
  */
 
 #include <linux/dma-debug.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/list.h>
 
+#define HASH_SIZE       256
+#define HASH_FN_SHIFT   20
+#define HASH_FN_MASK    0xffULL
+
 enum {
 	dma_debug_single,
 	dma_debug_sg,
@@ -37,3 +42,99 @@ struct dma_debug_entry {
 	int              direction;
 };
 
+struct hash_bucket {
+	struct list_head list;
+	spinlock_t lock;
+} ____cacheline_aligned;
+
+/* Hash list to save the allocated dma addresses */
+static struct hash_bucket dma_entry_hash[HASH_SIZE];
+
+/*
+ * Hash related functions
+ *
+ * Every DMA-API request is saved into a struct dma_debug_entry. To
+ * have quick access to these structs they are stored into a hash.
+ */
+static int hash_fn(struct dma_debug_entry *entry)
+{
+	/*
+	 * Hash function is based on the dma address.
+	 * We use bits 20-27 here as the index into the hash
+	 */
+	return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
+}
+
+/*
+ * Request exclusive access to a hash bucket for a given dma_debug_entry.
+ */
+static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
+					   unsigned long *flags)
+{
+	int idx = hash_fn(entry);
+	unsigned long __flags;
+
+	spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
+	*flags = __flags;
+	return &dma_entry_hash[idx];
+}
+
+/*
+ * Give up exclusive access to the hash bucket
+ */
+static void put_hash_bucket(struct hash_bucket *bucket,
+			    unsigned long *flags)
+{
+	unsigned long __flags = *flags;
+
+	spin_unlock_irqrestore(&bucket->lock, __flags);
+}
+
+/*
+ * Search a given entry in ...
From: Evgeniy Polyakov
Date: Friday, January 9, 2009 - 10:55 am

Hi Joerg.



Do you really need this getting they are called only from single place?

-- 
	Evgeniy Polyakov
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 11:14 am

Because everything else I thought about here was even more ugly. But
maybe you have a better idea? I tried to lock directly in the debug_

Hmm, true. I will inline these functions.

Joerg
--

From: Evgeniy Polyakov
Date: Friday, January 9, 2009 - 11:23 am

I believe that having direct locking in the debug_ functions is not a
duplication, anyone will have a direct vision on the locking and hash
array dereference, and this will be just one additional line compared to
the get_* call and the same number of lines for the put :)

-- 
	Evgeniy Polyakov
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 11:40 am

Even more additional lines because of the additional variables needed in
every function. Anyway, I try it and if it does not look good I will
keep that change ;)

Joerg
--

From: Andrew Morton
Date: Tuesday, January 13, 2009 - 1:51 am

__cacheline_aligned_in_smp.

This all looks like an exotically large amount of code for a debug thingy?
--

From: David Woodhouse
Date: Tuesday, January 13, 2009 - 1:59 am

Perhaps so, but it's a useful debug thingy which lets us debug _very_
common problems in drivers, which many people are unlikely to notice
without this 'assistance'. 

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation

--

From: Ingo Molnar
Date: Wednesday, January 14, 2009 - 4:43 am

this code checks the DMA usage of ~1 million lines of kernel code - all 
the DMA using drivers. I think Joerg's feature is hugely relevant as DMA 
scribbles are one of the hardest to debug kernel bugs: they can end up in 
permanent data corruption or other hard to find bugs. In fact i think his 
patchset is rather simple and even having 10 times as much debug code 
would pay for its existence in the long run.

	Ingo
--

From: Andrew Morton
Date: Wednesday, January 14, 2009 - 10:39 am

Have we previously found bugs by other means which this facility would
have detected?  I don't recall any...

--

From: Ingo Molnar
Date: Wednesday, January 14, 2009 - 10:43 am

this facility found a handful of real bugs already - Joerg, do you have 
more specifics about that?

Also, such a facility would be useful during driver development, when such 
bugs occur en masse. Faster driver development under Linux is certainly 
desirable.

This facility basically adds a sandbox to all DMA ops and tracks the 
lifetime and validity of DMA operations. Given how ugly DMA bugs can be in 
practice and how hard to debug they are, i see this as good step forward.

	Ingo
--

From: David Woodhouse
Date: Wednesday, January 14, 2009 - 10:48 am

Yes. We've found network driver bugs which only showed up when an IOMMU
has been active -- and were painful to diagnose because they involved
DMA going wrong. It happens when drivers make mistakes with the DMA
mapping APIs.

With this debug facility, we can find such problems on _any_ hardware,
and get a sane report about what's wrong.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation

--

From: Ingo Molnar
Date: Wednesday, January 14, 2009 - 10:48 am

btw., during the past decade we have had countless very ugly driver DMA 
bugs in the past that took vendors months and specialized equipment to 
track down.

I cannot give you a number breakdown, only an impression: storage drivers 
tended to be the hardest hit (due to the severity of the bugs and due to 
their inherent complexity) - but DMA bugs in networking drivers can be 
hard to track down too.

Plus there's another benefit: if a driver passes this checking and there's 
still DMA related corruption observed, then the hardware / firmware 
becomes a stronger suspect. This helps debugging too.

	Ingo
--

From: FUJITA Tomonori
Date: Wednesday, January 14, 2009 - 8:44 pm

On Wed, 14 Jan 2009 18:48:04 +0100

We have had ugly DMA bugs in scsi drivers but I think that this new
feature can find very few of them. I can't think of any SCSI driver
bugs that this could find. This feature can't find any popular DMA
bugs in scsi drivers, such as messing up driver's dma descriptor from
a scatter gather list.

But this can find some kinds of DMA bugs and it's is just about 1,000
lines. I don't see any problem about merging this. 1,000 lines it too
large? Maybe cutting just 1,000 lines up into too many pieces is
deceptive.
--

From: Joerg Roedel
Date: Wednesday, January 14, 2009 - 10:51 am

See for example this bugfix:

http://www.spinics.net/lists/netdev/msg83208.html

The bug was found using the first version of this patchset.

Joerg


-- 
           | Advanced Micro Devices GmbH
 Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
 System    | 
 Research  | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
 Center    | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
           | Registergericht München, HRB Nr. 43632

--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add documentation about DMA-API debugging to DMA-API.txt

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/DMA-API.txt |  117 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 117 insertions(+), 0 deletions(-)

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index b462bb1..e36e85a 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -610,3 +610,120 @@ size is the size (and should be a page-sized multiple).
 The return value will be either a pointer to the processor virtual
 address of the memory, or an error (via PTR_ERR()) if any part of the
 region is occupied.
+
+Part III - Debug drivers use of the DMA-API
+-------------------------------------------
+
+The DMA-API as described above as some constraints. DMA addresses must be
+released with the corresponding function with the same size for example. With
+the advent of hardware IOMMUs it becomes more and more important that drivers
+do not violate those constraints. In the worst case such a violation can
+result in data corruption up to destroyed filesystems.
+
+To debug drivers and find bugs in the usage of the DMA-API checking code can
+be compiled into the kernel which will tell the developer about those
+violations. If your architecture supports it you can select the "Enable
+debugging of DMA-API usage" option in your kernel configuration. Enabling this
+option has a performance impact. Do not enable it in production kernels.
+
+If you boot the resulting kernel will contain code which does some bookkeeping
+about what DMA memory was allocated for which device. If this code detects an
+error it prints a warning message with some details into your kernel log. An
+example warning message may look like this:
+
+------------[ cut here ]------------
+WARNING: at /data2/repos/linux.trees.git/lib/dma-debug.c:231
+	check_unmap+0xab/0x3d9()
+Hardware name: Toonie
+bnx2 0000:01:00.0: DMA-API: device driver tries to free DMA
+	memory it has ...
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debug callbacks for dma_{un}map_sg

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   15 ++++++++++++
 lib/dma-debug.c           |   53 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 82ae9ca..b2131f4 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -34,6 +34,11 @@ extern void debug_map_single(struct device *dev, void *ptr, size_t size,
 extern void debug_unmap_single(struct device *dev, dma_addr_t addr,
 			       size_t size, int direction);
 
+extern void debug_map_sg(struct device *dev, struct scatterlist *sg,
+			 int nents, int direction);
+
+extern void debug_unmap_sg(struct device *dev, struct scatterlist *sglist,
+			   int nelems, int dir);
 
 #else /* CONFIG_DMA_API_DEBUG */
 
@@ -52,6 +57,16 @@ static inline void debug_unmap_single(struct device *dev, dma_addr_t addr,
 {
 }
 
+static inline void debug_map_sg(struct device *dev, struct scatterlist *sg,
+				int nents, int direction)
+{
+}
+
+static inline void debug_unmap_sg(struct device *dev,
+				  struct scatterlist *sglist,
+				  int nelems, int dir)
+{
+}
 
 #endif /* CONFIG_DMA_API_DEBUG */
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d4d14e5..e6d45f9 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -17,6 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
@@ -551,3 +552,55 @@ void debug_unmap_single(struct device *dev, dma_addr_t addr,
 }
 EXPORT_SYMBOL(debug_unmap_single);
 
+void debug_map_sg(struct device *dev, struct scatterlist *sg,
+		  int nents, int direction)
+{
+	struct dma_debug_entry *entry;
+	struct scatterlist *s;
+	int i;
+
+	if (global_disable)
+		return;
+
+	for_each_sg(sg, s, nents, i) ...
From: Evgeniy Polyakov
Date: Friday, January 9, 2009 - 11:08 am

Will this print false errors if above map debug failed to add an entry
into the list?

-- 
	Evgeniy Polyakov
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 11:11 am

No. The code disables itself if adding an entry fails. This can only
happen when we run out of preallocated dma_debug_entries.

Joerg
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debugfs interface for configuring DMA-API debugging

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 lib/dma-debug.c |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 78 insertions(+), 0 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4e58d09..ca0ccb1 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -19,6 +19,7 @@
 
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
+#include <linux/debugfs.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -58,12 +59,29 @@ static DEFINE_SPINLOCK(free_entries_lock);
 /* Global disable flag - will be set in case of an error */
 static bool global_disable __read_mostly;
 
+/* Global error count */
+static u32 error_count;
+
+/* Global error show enable*/
+static u32 show_all_errors __read_mostly;
+/* Number of errors to show */
+static u32 show_num_errors = 1;
+
 static u32 num_free_entries;
 static u32 min_free_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
 static u32 req_entries;
 
+/* debugfs dentry's for the stuff above */
+static struct dentry *dma_debug_dent;
+static struct dentry *global_disable_dent;
+static struct dentry *error_count_dent;
+static struct dentry *show_all_errors_dent;
+static struct dentry *show_num_errors_dent;
+static struct dentry *num_free_entries_dent;
+static struct dentry *min_free_entries_dent;
+
 /*
  * Hash related functions
  *
@@ -238,6 +256,58 @@ out_err:
 	return -ENOMEM;
 }
 
+static int dma_debug_fs_init(void)
+{
+	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
+	if (!dma_debug_dent) {
+		printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
+		return -ENOMEM;
+	}
+
+	global_disable_dent = debugfs_create_bool("disabled", 0444,
+			dma_debug_dent,
+			(u32 *)&global_disable);
+	if (!global_disable_dent)
+		goto out_err;
+
+	error_count_dent = debugfs_create_u32("error_count", 0444,
+			dma_debug_dent, ...
From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:08 pm

Small detail: please use native C types for non-hardware variables - 
'unsigned int', 'unsigned long', etc.

u32/u64 is typically used for variables where there is a real significance 
to the precise width of the variable: there's either a hardware or a 
user-space ABI involved.

	Ingo
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 12:52 am

Hmm, these variables are exported to debugfs. Thats the reason I made
them u32.

Joerg
--

From: Joerg Roedel
Date: Wednesday, January 14, 2009 - 8:22 am

There is some kind of userspace ABI: all these variables are exported
via debugfs. And debugfs relies on these types.

Joerg

-- 
           | Advanced Micro Devices GmbH
 Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
 System    | 
 Research  | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
 Center    | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
           | Registergericht München, HRB Nr. 43632

--

From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:15 pm

should all be __read_mostly.

	Ingo
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add functions to check on dma unmap and sync

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 lib/dma-debug.c |  133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 133 insertions(+), 0 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ca0ccb1..9f730a4 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -17,9 +17,11 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
+#include <linux/device.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -82,6 +84,22 @@ static struct dentry *show_num_errors_dent;
 static struct dentry *num_free_entries_dent;
 static struct dentry *min_free_entries_dent;
 
+static char *type2name[3] = { "single", "scather-gather", "coherent" };
+
+static char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+			     "DMA_FROM_DEVICE", "DMA_NONE" };
+
+#define err_printk(dev, format, arg...) do {			\
+		error_count += 1;				\
+		if (show_all_errors || show_num_errors > 0) {	\
+			WARN(1, "%s %s: " format,		\
+			     dev_driver_string(dev),		\
+			     dev_name(dev) , ## arg);		\
+		}						\
+		if (!show_all_errors && show_num_errors > 0)	\
+			show_num_errors -= 1;			\
+	} while (0);
+
 /*
  * Hash related functions
  *
@@ -377,3 +395,118 @@ static __init int dma_debug_entries_cmdline(char *str)
 __setup("dma_debug=", dma_debug_cmdline);
 __setup("dma_debug_entries=", dma_debug_entries_cmdline);
 
+static void check_unmap(struct dma_debug_entry *ref)
+{
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	if (dma_mapping_error(ref->dev, ref->dev_addr))
+		return;
+
+	bucket = get_hash_bucket(ref, &flags);
+	entry = hash_bucket_find(bucket, ref);
+
+	if (!entry) {
+		err_printk(ref->dev, "DMA-API: device driver tries "
+			   "to free DMA memory ...
From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:11 pm

Note that the arithmetics here is SMP-unsafe: we only hold the hash bucket 
so if two errors hit at once on two CPUs then the error tracking variables 
can be accessed at once.

I'd suggest a simple global lock for this error case (taken inside the 
hash bucket lock), to be on the safe side.

Also, please dont use a macro for this - printk details can be passed in 
to helper inlines/functions too.

	Ingo
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 12:57 am

Yeah, this is not SMP-safe, I know. But debugfs does not support
atomic_t so I made the variables u32. But at least a race condition has
not a too bad impact. What may habben is that error_count misses a error
or the show_num_errors become negative.
But if we really want to avoid this I think its better to add atomic_t
support to debugfs. What do you think?

Joerg
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 1:34 am

Even a global lock will not really help here because show_num_errors and
show_all_errors can be set using debugfs. Either we live with the small
race (with limited impact) or I add atomic_t support to debugfs.

Joerg

--

From: Joerg Roedel
Date: Wednesday, January 14, 2009 - 4:44 am

As I wrote in a previous email, a race here is no big deal. I add a

Hmm, how does this look like? There is not WARN variant which can use
va_args as a parameter. And it is important that the error message is
logged in the warning itself. So the driver developer can see it when a
user reports the warning.

Joerg

-- 
           | Advanced Micro Devices GmbH
 Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
 System    | 
 Research  | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
 Center    | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
           | Registergericht München, HRB Nr. 43632

--

From: Ingo Molnar
Date: Wednesday, January 14, 2009 - 4:48 am

we commonly use global locks in debug exception cases - to serialize 
console output. But it's certainly no big deal.

	Ingo
--

From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:12 pm

You must be using Vim and copy & paste messed up? :)

	Ingo
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 12:54 am

Ups, true :) Thanks.

Joerg
--

From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:13 pm

Should be const i guess.

	Ingo
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add groundwork for DMA-API debugging

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   25 +++++++++++++++++++++++++
 lib/Makefile              |    2 ++
 lib/dma-debug.c           |   39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/dma-debug.h
 create mode 100644 lib/dma-debug.c

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
new file mode 100644
index 0000000..ce4ace7
--- /dev/null
+++ b/include/linux/dma-debug.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __DMA_DEBUG_H
+#define __DMA_DEBUG_H
+
+struct device;
+
+#endif /* __DMA_DEBUG_H */
diff --git a/lib/Makefile b/lib/Makefile
index 32b0e64..50b48cf 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -84,6 +84,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
 
 obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o
 
+obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
new file mode 100644
index 0000000..d04f8b6
--- /dev/null
+++ b/lib/dma-debug.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008 Advanced Micro ...
From: FUJITA Tomonori
Date: Saturday, January 10, 2009 - 11:25 pm

On Fri, 9 Jan 2009 17:19:16 +0100

I don't think that cpu addresses are appropriate here.
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: make use of DMA-API debugging code in x86

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/Kconfig                   |    1 +
 arch/x86/include/asm/dma-mapping.h |   30 ++++++++++++++++++++++++++----
 arch/x86/kernel/pci-dma.c          |    5 +++++
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 862adb9..68a806c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -39,6 +39,7 @@ config X86
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select USER_STACKTRACE_SUPPORT
+	select HAVE_DMA_API_DEBUG
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4035357..939d5b3 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -7,6 +7,7 @@
  */
 
 #include <linux/scatterlist.h>
+#include <linux/dma-debug.h>
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 #include <asm-generic/dma-coherent.h>
@@ -93,9 +94,12 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size,
 	       int direction)
 {
 	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+	dma_addr_t addr;
 
 	BUG_ON(!valid_dma_direction(direction));
-	return ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+	addr = ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+	debug_map_single(hwdev, ptr, size, direction, addr);
+	return addr;
 }
 
 static inline void
@@ -105,6 +109,7 @@ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
 	struct dma_mapping_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(direction));
+	debug_unmap_single(dev, addr, size, direction);
 	if (ops->unmap_single)
 		ops->unmap_single(dev, addr, size, direction);
 }
@@ -114,9 +119,13 @@ dma_map_sg(struct device *hwdev, struct scatterlist *sg,
 	   int nents, int direction)
 {
 	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+	int ret;
 
 ...
From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:04 pm

It all looks very nice, i've got one small namespace structure request: 
could you please name all the callbacks in a consistent way, so that they 
mirror the method they instrument - with a "debug_" prefix?

I.e. the above one would be: debug_dma_unmap_single().

	Ingo
--

From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:48 pm

whitespace damage here.

	Ingo
--

From: FUJITA Tomonori
Date: Saturday, January 10, 2009 - 11:25 pm

[Empty message]
From: Joerg Roedel
Date: Sunday, January 11, 2009 - 1:08 am

Ah true. The debug_map_single function has to check for

No, this can not happen. If the code fails it disables itself so no more
warnings will be printed.

Joerg

--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debug callbacks for dma_sync_single_for_* functions

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   20 ++++++++++++++++++++
 lib/dma-debug.c           |   20 ++++++++++++++++++++
 2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index a28a701..f39c2a8 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -46,6 +46,14 @@ extern void debug_alloc_coherent(struct device *dev, size_t size,
 extern void debug_free_coherent(struct device *dev, size_t size,
 				void *virt, dma_addr_t addr);
 
+extern void debug_sync_single_for_cpu(struct device *dev,
+				      dma_addr_t dma_handle, size_t size,
+				      int direction);
+
+extern void debug_sync_single_for_device(struct device *dev,
+					 dma_addr_t dma_handle,
+					 size_t size, int direction);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_init(u32 num_entries)
@@ -84,6 +92,18 @@ static inline void debug_free_coherent(struct device *dev, size_t size,
 {
 }
 
+static inline void debug_sync_single_for_cpu(struct device *dev,
+					     dma_addr_t dma_handle,
+					     size_t size, int direction)
+{
+}
+
+static inline void debug_sync_single_for_device(struct device *dev,
+						dma_addr_t dma_handle,
+						size_t size, int direction)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index a4a5b0f..a7f2369 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -649,3 +649,23 @@ void debug_free_coherent(struct device *dev, size_t size,
 }
 EXPORT_SYMBOL(debug_free_coherent);
 
+void debug_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			       size_t size, int direction)
+{
+	if (global_disable)
+		return;
+
+	check_sync(dev, dma_handle, size, 0, direction, true);
+}
+EXPORT_SYMBOL(debug_sync_single_for_cpu);
+
+void debug_sync_single_for_device(struct ...
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add dma_debug= and dma_debug_entries= kernel parameters

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/kernel-parameters.txt |   10 +++++++++
 lib/dma-debug.c                     |   38 +++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fb84902..4045f76 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -486,6 +486,16 @@ and is between 256 and 4096 characters. It is defined in the file
 			Range: 0 - 8192
 			Default: 64
 
+	dma_debug=off	If the kernel is compiled with DMA_API_DEBUG support
+			this option disables the debugging code at boot.
+
+	dma_debug_entries=<number>
+			This option allows to tune the number of preallocated
+			entries for DMA-API debugging code. One entry is
+			required per DMA-API allocation. Use this if the
+			DMA-API debugging code disables itself because the
+			architectural default is too low.
+
 	hpet=		[X86-32,HPET] option to control HPET usage
 			Format: { enable (default) | disable | force }
 			disable: disable HPET and use PIT instead
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index b932f15..4e58d09 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -61,6 +61,9 @@ static bool global_disable __read_mostly;
 static u32 num_free_entries;
 static u32 min_free_entries;
 
+/* number of preallocated entries requested by kernel cmdline */
+static u32 req_entries;
+
 /*
  * Hash related functions
  *
@@ -250,6 +253,9 @@ void dma_debug_init(u32 num_entries)
 		dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
 	}
 
+	if (req_entries)
+		num_entries = req_entries;
+
 	if (prealloc_memory(num_entries) != 0) {
 		printk(KERN_ERR "DMA-API: debugging out of memory error "
 				"- disabled\n");
@@ -261,3 +267,35 @@ void dma_debug_init(u32 num_entries)
 	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
 }
 ...
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debug callbacks for dma_[alloc|free]_coherent

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   16 ++++++++++++++++
 lib/dma-debug.c           |   45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index b2131f4..a28a701 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -40,6 +40,12 @@ extern void debug_map_sg(struct device *dev, struct scatterlist *sg,
 extern void debug_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			   int nelems, int dir);
 
+extern void debug_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t dma_addr, void *virt);
+
+extern void debug_free_coherent(struct device *dev, size_t size,
+				void *virt, dma_addr_t addr);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_init(u32 num_entries)
@@ -68,6 +74,16 @@ static inline void debug_unmap_sg(struct device *dev,
 {
 }
 
+static inline void debug_alloc_coherent(struct device *dev, size_t size,
+					dma_addr_t dma_addr, void *virt)
+{
+}
+
+static inline void debug_free_coherent(struct device *dev, size_t size,
+				       void *virt, dma_addr_t addr)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index e6d45f9..a4a5b0f 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -604,3 +604,48 @@ void debug_unmap_sg(struct device *dev, struct scatterlist *sglist,
 }
 EXPORT_SYMBOL(debug_unmap_sg);
 
+void debug_alloc_coherent(struct device *dev, size_t size,
+			  dma_addr_t dma_addr, void *virt)
+{
+	struct dma_debug_entry *entry;
+
+	if (global_disable)
+		return;
+
+	if (dma_addr == bad_dma_address)
+		return;
+
+	entry = dma_entry_alloc();
+	if (!entry)
+		return;
+
+	entry->type      = dma_debug_coherent;
+	entry->dev       = dev;
+	entry->cpu_addr  = virt;
+	entry->size     ...
From: FUJITA Tomonori
Date: Saturday, January 10, 2009 - 11:25 pm

On Fri, 9 Jan 2009 17:19:25 +0100

Only X86 has 'bad_dma_address' (IA64 also has with some configurations
though). You need to use dma_mapping_error, as I pointed out in the
previous submission.

I recommend you to try this on one non-x86 box (at least). Even if you
don't have non x86, you can use cross compiler.
--

From: FUJITA Tomonori
Date: Saturday, January 10, 2009 - 11:30 pm

On Sun, 11 Jan 2009 15:25:42 +0900

Oops, this is for dma_alloc_coherent so you need to check 'virt'
instead of dma_mapping_error().
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 12:59 am

Ah, true. Thanks. I will fix that.

Joerg
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add a Kconfig entry for DMA-API debugging

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/Kconfig      |    2 ++
 lib/Kconfig.debug |   11 +++++++++++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 2e13aa2..068554c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,3 +103,5 @@ config HAVE_CLK
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
 
+config HAVE_DMA_API_DEBUG
+	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e75478..cba5778 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -899,6 +899,17 @@ config DYNAMIC_PRINTK_DEBUG
 	  debugging for all modules. This mode can be turned off via the above
 	  disable command.
 
+config DMA_API_DEBUG
+	bool "Enable debugging of DMA-API usage"
+	depends on HAVE_DMA_API_DEBUG
+	help
+	  Enable this option to debug the use of the DMA API by device drivers.
+	  With this option you will be able to detect common bugs in device
+	  drivers like double-freeing of DMA mappings or freeing mappings that
+	  were never allocated.
+	  This option causes a performance degredation.  Use only if you want
+	  to debug device drivers. If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
-- 
1.5.6.4


--

From: Randy Dunlap
Date: Friday, January 9, 2009 - 1:12 pm

---
~Randy
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debug callbacks for dma_{un}map_single

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   19 +++++++++++++++++++
 lib/dma-debug.c           |   41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 345d538..82ae9ca 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -28,12 +28,31 @@ struct device;
 
 extern void dma_debug_init(u32 num_entries);
 
+extern void debug_map_single(struct device *dev, void *ptr, size_t size,
+			     int direction, dma_addr_t dma_addr);
+
+extern void debug_unmap_single(struct device *dev, dma_addr_t addr,
+			       size_t size, int direction);
+
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_init(u32 num_entries)
 {
 }
 
+static inline void debug_map_single(struct device *dev, void *ptr,
+				    size_t size, int direction,
+				    dma_addr_t dma_addr)
+{
+}
+
+static inline void debug_unmap_single(struct device *dev, dma_addr_t addr,
+				      size_t size, int direction)
+{
+}
+
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 9f730a4..d4d14e5 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -510,3 +510,44 @@ out:
 
 }
 
+void debug_map_single(struct device *dev, void *ptr, size_t size,
+		      int direction, dma_addr_t dma_addr)
+{
+	struct dma_debug_entry *entry;
+
+	if (global_disable)
+		return;
+
+	entry = dma_entry_alloc();
+	if (!entry)
+		return;
+
+	entry->dev       = dev;
+	entry->type      = dma_debug_single;
+	entry->cpu_addr  = ptr;
+	entry->dev_addr  = dma_addr;
+	entry->size      = size;
+	entry->direction = direction;
+
+	add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_map_single);
+
+void debug_unmap_single(struct device *dev, dma_addr_t addr,
+			size_t size, int direction)
+{
+	struct dma_debug_entry ref ...
From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add allocator code for struct dma_debug_entry

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 lib/dma-debug.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 57 insertions(+), 0 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 74a0f36..c5444d4 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -49,6 +49,16 @@ struct hash_bucket {
 
 /* Hash list to save the allocated dma addresses */
 static struct hash_bucket dma_entry_hash[HASH_SIZE];
+/* List of pre-allocated dma_debug_entry's */
+static LIST_HEAD(free_entries);
+/* Lock for the list above */
+static DEFINE_SPINLOCK(free_entries_lock);
+
+/* Global disable flag - will be set in case of an error */
+static bool global_disable __read_mostly;
+
+static u32 num_free_entries;
+static u32 min_free_entries;
 
 /*
  * Hash related functions
@@ -138,3 +148,50 @@ static void add_dma_entry(struct dma_debug_entry *entry)
 	put_hash_bucket(bucket, &flags);
 }
 
+/* struct dma_entry allocator
+ *
+ * The next two functions implement the allocator for
+ * struct dma_debug_entries.
+ */
+static struct dma_debug_entry *dma_entry_alloc(void)
+{
+	struct dma_debug_entry *entry = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&free_entries_lock, flags);
+
+	if (list_empty(&free_entries)) {
+		printk(KERN_ERR "DMA-API: debugging out of memory "
+				"- disabling\n");
+		global_disable = true;
+		goto out;
+	}
+
+	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+	list_del(&entry->list);
+	memset(entry, 0, sizeof(*entry));
+
+	num_free_entries -= 1;
+	if (num_free_entries < min_free_entries)
+		min_free_entries = num_free_entries;
+
+out:
+	spin_unlock_irqrestore(&free_entries_lock, flags);
+
+	return entry;
+}
+
+static void dma_entry_free(struct dma_debug_entry *entry)
+{
+	unsigned long flags;
+
+	/*
+	 * add to beginning of the list - this way the entries are
+	 * more likely cache hot when they are ...
From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:43 pm

btw., i'd suggest to not break kernel messages mid-string, but do 
		 "DMA-API: debugging out of memory - disabling\n");

Also, i'd use WARN() - it might be useful to see what callsite depleted 

unlikely() i guess.


it might make sense to cache entries in the buckets - hence reuse the 
bucket spinlock. This means a somewhat higher effective pool size, but it 
also avoids a global lock.

	Ingo
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 9:19 am

Impact: add debug callbacks for dma_sync_sg_* functions

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/dma-debug.h |   19 +++++++++++++++++++
 lib/dma-debug.c           |   32 ++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index b9c221a..bdba8c8 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -64,6 +64,13 @@ extern void debug_sync_single_range_for_device(struct device *dev,
 					       unsigned long offset,
 					       size_t size, int direction);
 
+extern void debug_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				  int nelems, int direction);
+
+extern void debug_sync_sg_for_device(struct device *dev,
+				     struct scatterlist *sg,
+				     int nelems, int direction);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_init(u32 num_entries)
@@ -130,6 +137,18 @@ static inline void debug_sync_single_range_for_device(struct device *dev,
 {
 }
 
+static inline void debug_sync_sg_for_cpu(struct device *dev,
+					 struct scatterlist *sg,
+					 int nelems, int direction)
+{
+}
+
+static inline void debug_sync_sg_for_device(struct device *dev,
+					    struct scatterlist *sg,
+					    int nelems, int direction)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 6f73bed..e40c88c 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -692,3 +692,35 @@ void debug_sync_single_range_for_device(struct device *dev,
 }
 EXPORT_SYMBOL(debug_sync_single_range_for_device);
 
+void debug_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			   int nelems, int direction)
+{
+	struct scatterlist *s;
+	int i;
+
+	if (global_disable)
+		return;
+
+	for_each_sg(sg, s, nelems, i) {
+		check_sync(dev, s->dma_address, s->dma_length, 0,
+				direction, ...
From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:46 pm

"if (unlikely(global_disable))" i suspect?

	Ingo
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 1:00 am

True. I  will add unlikely() to all global_disable checks.

Joerg
--

From: Michael Chan
Date: Friday, January 9, 2009 - 2:24 pm

This was triggered during pci_unmap_page() -> dma_unmap_single() where
check_unmap() did not find the entry.

The original mapping was done in bnx2 using pci_map_page().  I did not
see how the debug entry was added to the hash during the call to
pci_map_page() -> dma_map_page().  Did I miss something?

Thanks.


--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 3:33 pm

dma_map_page() results in dma_map_single() -> debug_map_single() call on
x86. This way the entry would be added. Maybe the error from a double
free?

Joerg
--

From: Joerg Roedel
Date: Friday, January 9, 2009 - 3:37 pm

Ah, it only calls ops->map_single. Thanks for pointing that out. I will
add a call to debug_map_single to dma_map_page too. So the error above
may be a false positive.

Joerg
--

From: FUJITA Tomonori
Date: Saturday, January 10, 2009 - 11:25 pm

On Fri, 9 Jan 2009 23:37:47 +0100

Then you will find that the interface of debug_map_single is wrong.
You can't use cpu address here. I explained this to you before:

http://marc.info/?t=123116736500005&r=1&w=2
--

From: Ingo Molnar
Date: Saturday, January 10, 2009 - 4:54 pm

Looks pretty good in general - modulo the few observations i just made in 
reply to the patches. (Note, the comments apply to all the patches - 
there's similar small issues in other places as well.)

Did you have a chance to look at debugobjects and see whether it could be 
reused as the dma-debug-entry object management code?

One request: could you please git-base it on the changes in tip/core/iommu 
(i only have looked at the patches in email - i presume the current ones 
are based on -git)?

That way we'll have it on top of Fujita's very nice DMA-mapping 
consolidation code.

	Ingo
--

From: Joerg Roedel
Date: Sunday, January 11, 2009 - 1:11 am

Yes, I looked at it. But the code looks like it is used to debug object

Yes, I will rebase it for the next post.

Joerg

--

From: David Woodhouse
Date: Thursday, February 5, 2009 - 3:52 pm

This adds a function to dump the DMA mappings that the debugging code is
aware of -- either for a single device, or for _all_ devices.

This can be useful for debugging -- sticking a call to it in the DMA
page fault handler, for example, to see if the faulting address _should_
be mapped or not, and hence work out whether it's IOMMU bugs we're
seeing, or driver bugs.

I'd also like to make it answer the question 'should address X be mapped
for device Y', but I'll get to that next.

Do we have a %pX format for printing dma_addr_t yet? 

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 8a8aae4..5f4fc9f 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -75,6 +75,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
 					 struct scatterlist *sg,
 					 int nelems, int direction);
 
+extern void debug_dma_dump_mappings(struct device *dev);
+
 #else /* CONFIG_DMA_API_DEBUG */
 
 static inline void dma_debug_init(u32 num_entries)
@@ -155,6 +157,10 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev,
 {
 }
 
+static inline void debug_dma_dump_mappings(struct device *dev)
+{
+}
+
 #endif /* CONFIG_DMA_API_DEBUG */
 
 #endif /* __DMA_DEBUG_H */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 469e5b9..127d108 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -191,6 +191,36 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
 }
 
 /*
+ * Dump mapping entries for debugging purposes
+ */
+void debug_dma_dump_mappings(struct device *dev)
+{
+	int idx;
+
+	for (idx = 0; idx < HASH_SIZE; idx++) {
+		struct hash_bucket *bucket = &dma_entry_hash[idx];
+		struct dma_debug_entry *entry;
+		unsigned long flags;
+
+		spin_lock_irqsave(&bucket->lock, flags);
+
+		list_for_each_entry(entry, &bucket->list, list) {
+			if (!dev || dev == entry->dev) {
+				dev_info(entry->dev,
+					 "%s idx %d P=%Lx D=%Lx L=%Lx ...
From: Chris Wright
Date: Thursday, February 5, 2009 - 7:05 pm

extra tab

Sample output below (2 of ~2500 faults), pages don't appear to be mapped:
(interesting hash_fn spread ;-)

[   21.646796] DMAR:[DMA Write] Request device [03:00.0] fault addr ff9df000 
[   21.646796] DMAR:[fault reason 05] PTE Write access is not set
[   21.646799] iwlagn 0000:03:00.0: single idx 0 P=1347d4000 D=ffe00000 L=2100 DMA_FROM_DEVICE
[   21.646802] iwlagn 0000:03:00.0: single idx 0 P=1325d4000 D=ffc00000 L=2100 DMA_FROM_DEVICE
[   21.646804] iwlagn 0000:03:00.0: coherent idx 0 P=1322a0000 D=ffa00000 L=8000 DMA_BIDIRECTIONAL
[   21.646806] iwlagn 0000:03:00.0: single idx 2 P=1347d0000 D=ffe04000 L=2100 DMA_FROM_DEVICE
[   21.646808] iwlagn 0000:03:00.0: single idx 2 P=1325d0000 D=ffc04000 L=2100 DMA_FROM_DEVICE
[   21.646810] iwlagn 0000:03:00.0: single idx 4 P=1347cc000 D=ffe08000 L=2100 DMA_FROM_DEVICE
[   21.646812] iwlagn 0000:03:00.0: single idx 4 P=1325cc000 D=ffc08000 L=2100 DMA_FROM_DEVICE
[   21.646814] iwlagn 0000:03:00.0: coherent idx 4 P=132258000 D=ffa08000 L=8000 DMA_BIDIRECTIONAL
[   21.646816] iwlagn 0000:03:00.0: single idx 6 P=1347c8000 D=ffe0c000 L=2100 DMA_FROM_DEVICE
[   21.646818] iwlagn 0000:03:00.0: single idx 6 P=1325c8000 D=ffc0c000 L=2100 DMA_FROM_DEVICE
[   21.646820] iwlagn 0000:03:00.0: single idx 8 P=1347c4000 D=ffe10000 L=2100 DMA_FROM_DEVICE
[   21.646822] iwlagn 0000:03:00.0: single idx 8 P=1325c4000 D=ffc10000 L=2100 DMA_FROM_DEVICE
[   21.646824] iwlagn 0000:03:00.0: coherent idx 8 P=132230000 D=ffa10000 L=8000 DMA_BIDIRECTIONAL
[   21.646826] iwlagn 0000:03:00.0: single idx 10 P=1347c0000 D=ffe14000 L=2100 DMA_FROM_DEVICE
[   21.646828] iwlagn 0000:03:00.0: single idx 10 P=1325c0000 D=ffc14000 L=2100 DMA_FROM_DEVICE
[   21.646830] iwlagn 0000:03:00.0: single idx 12 P=1347bc000 D=ffe18000 L=2100 DMA_FROM_DEVICE
[   21.646832] iwlagn 0000:03:00.0: single idx 12 P=1325bc000 D=ffc18000 L=2100 DMA_FROM_DEVICE
[   21.646834] iwlagn 0000:03:00.0: coherent idx 12 P=1321e8000 D=ffa18000 L=8000 DMA_BIDIRECTIONAL
[   21.646836] iwlagn ...
From: David Woodhouse
Date: Friday, February 6, 2009 - 12:56 am

Thanks. Your code to hook it up is better than mine too. I'll steal

What machine did you get that on?

Yeah, I saw one of those. If could be a driver bug, of course -- it
could be unmapping a range before it's actually finished with it. But
that's unlikely.

An alternative explanation... The DMA is aborted¹, and the device
interrupts us to tell us about it at the _same_ time that the IOMMU
interrupts us to tell us about the fault. We process the device
interrupt first, unmap that buffer. And then we process the IOMMU
interrupt... and the buffer is already gone from the list.

It might be interesting to make this code also remember and print the

Yeah, that's a little suboptimal, isn't it :)

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation

¹ due to the bug we're chasing now. The range _is_ supposed to be mapped.

--

From: Chris Wright
Date: Friday, February 6, 2009 - 9:08 am

One thing I noticed is many of the faults are the page prior to a
coherent range 0x8000 in length, which seems to correspond to tfd buffer.
No obvious off-by-one error anywhere, and not all faults fit that pattern,
so w/out more iwlagn driver knowledge hard to say if that's meaningful


That's what I was thinking too.  Almost need a flight recorder mode to
see if the range was ever mapped/unmapped.

thanks,
-chris
--

From: Chris Wright
Date: Friday, February 6, 2009 - 11:20 am

[  362.283661] Device 0000:03:00.0 mapping: 1000@fff1b000
[  362.283727] DMAR:[DMA Write] Request device [03:00.0] fault addr fff1b000 
[  362.284719] Device 0000:03:00.0 unmapping: 1000@fff1b000

[  362.426974] Device 0000:03:00.0 mapping: 1000@fff1b000
[  362.427040] DMAR:[DMA Write] Request device [03:00.0] fault addr fff1b000 
[  362.429092] Device 0000:03:00.0 unmapping: 1000@fff1b000

[  447.644332] Device 0000:03:00.0 mapping: 1000@fff03000
[  447.644373] DMAR:[DMA Write] Request device [03:00.0] fault addr fff03000 
[  447.646008] Device 0000:03:00.0 unmapping: 1000@fff03000

[  483.037641] Device 0000:03:00.0 mapping: 1000@ffc9f000
[  483.037707] DMAR:[DMA Write] Request device [03:00.0] fault addr ffc9f000 
[  483.038699] Device 0000:03:00.0 unmapping: 1000@ffc9f000

...

Looks like driver is doing the right thing.
--

From: Joerg Roedel
Date: Thursday, February 12, 2009 - 7:48 am

Hmm, thats because all device addresses are 16kb aligned. The hashfn
uses bits 13 to 21 as the index for the hash. I can move this window up
to bits 18-26 if it improves the hash spread.

Joerg
--

From: Chris Wright
Date: Thursday, February 5, 2009 - 7:27 pm

BTW, here's how I hooked it up:

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c933980..df593d1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1072,6 +1072,34 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
+static struct pci_dev *domain_find_pdev(u8 bus, u8 devfn)
+{
+	unsigned long flags;
+	struct pci_dev *pdev = NULL;
+	struct device_domain_info *info;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	list_for_each_entry(info, &device_domain_list, global) {
+		if (info->bus == bus && info->devfn == devfn) {
+			pdev = info->dev;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+	return pdev;
+}
+
+static void dump_mappings(u16 source_id)
+{
+	struct pci_dev *pdev;
+	u8 bus = source_id >> 8;
+	u8 devfn = source_id & 0xff;
+
+	pdev = domain_find_pdev(bus, devfn);
+	if (pdev)
+		debug_dma_dump_mappings(&pdev->dev);
+}
+
 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
 		u8 fault_reason, u16 source_id, unsigned long long addr)
 {
@@ -1086,6 +1114,8 @@ static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
 		(type ? "DMA Read" : "DMA Write"),
 		(source_id >> 8), PCI_SLOT(source_id & 0xFF),
 		PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+
+	dump_mappings(source_id);
 	return 0;
 }
 
--

From: Joerg Roedel
Date: Thursday, February 12, 2009 - 8:20 am

Great. Thanks. Applied to my dma-api/debug branch.

Joerg
--

Previous thread: none

Next thread: none