On Sat, Feb 23, 2008 at 12:05:17AM -0800, Andrew Morton wrote:
fixed.
fixed.
fixed.
Yup, not needed. Only the single threaded __init path increments this
and after that only read accesses happen.
I just looked, I don't think bitmap.h helps me with my bitmap use in
this module.
Fixed. Was leaking the g_iommus_to_flush on the error path.
fixed.
Its not very, but I'm doing an insert walk of a list and need the locks.
On the bright side, its way better than spin locking and poling the
IOTLB bit for cleared on every unmap.
Fixed.
The only error path to delete the single timer created is at the bottom
of intel_iommu_init. I don't think there is a place for the timer to be
delete and any error clean up paths to put it.
Yes, I could easily create a sysfs or debugfs mechanism for turning it
on / off at run time. I would like input on the preferred way to do this.
sysfs or debugfs?
Thanks again for your review on this. The following is the updated
patch.
--mgross
Signed-off-by: <mgross@linux.intel.com>
Index: linux-2.6.25-rc2-mm1/drivers/pci/intel-iommu.c
===================================================================
--- linux-2.6.25-rc2-mm1.orig/drivers/pci/intel-iommu.c 2008-02-26 11:15:46.000000000 -0800
+++ linux-2.6.25-rc2-mm1/drivers/pci/intel-iommu.c 2008-02-29 14:36:55.000000000 -0800
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/bitmap.h>
+#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
@@ -30,6 +31,7 @@
#include <linux/dmar.h>
#include <linux/dma-mapping.h>
#include <linux/mempool.h>
+#include <linux/timer.h>
#include "iova.h"
#include "intel-iommu.h"
#include <asm/proto.h> /* force_iommu in this header in x86-64*/
@@ -50,11 +52,32 @@
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
+
+static void flush_unmaps_timeout(unsigned long data);
+
+DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
static void domain_remove_dev_info(struct dmar_domain *domain);
static int dmar_disabled;
static int __initdata dmar_map_gfx = 1;
static int dmar_forcedac;
+static int intel_iommu_strict;
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +96,13 @@
printk(KERN_INFO
"Intel-IOMMU: disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
- printk (KERN_INFO
+ printk(KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+ } else if (!strncmp(str, "strict", 6)) {
+ printk(KERN_INFO
+ "Intel-IOMMU: disable batched IOTLB flush\n");
+ intel_iommu_strict = 1;
}
str += strcspn(str, ",");
@@ -965,17 +992,13 @@
set_bit(0, iommu->domain_ids);
return 0;
}
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+ struct dmar_drhd_unit *drhd)
{
- struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
- iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
- if (!iommu)
- return NULL;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
@@ -1396,7 +1419,7 @@
int index;
while (dev) {
- for (index = 0; index < cnt; index ++)
+ for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
@@ -1661,7 +1684,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
- int ret, unit = 0;
+ int nlongs, i, ret, unit = 0;
/*
* for each drhd
@@ -1672,7 +1695,35 @@
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
- iommu = alloc_iommu(drhd);
+ g_num_of_iommus++;
+ /*
+ * lock not needed as this is only incremented in the single
+ * threaded kernel __init code path all other access are read
+ * only
+ */
+ }
+
+ nlongs = BITS_TO_LONGS(g_num_of_iommus);
+ g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
+ if (!g_iommus_to_flush) {
+ printk(KERN_ERR "Intel-IOMMU: "
+ "Allocating bitmap array failed\n");
+ return -ENOMEM;
+ }
+
+ g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
+ if (!g_iommus) {
+ kfree(g_iommus_to_flush);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ i = 0;
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = alloc_iommu(&g_iommus[i], drhd);
+ i++;
if (!iommu) {
ret = -ENOMEM;
goto error;
@@ -1705,7 +1756,6 @@
* endfor
*/
for_each_rmrr_units(rmrr) {
- int i;
for (i = 0; i < rmrr->devices_cnt; i++) {
pdev = rmrr->devices[i];
/* some BIOS lists non-exist devices in DMAR table */
@@ -1761,6 +1811,7 @@
iommu = drhd->iommu;
free_iommu(iommu);
}
+ kfree(g_iommus);
return ret;
}
@@ -1909,6 +1960,53 @@
return 0;
}
+static void flush_unmaps(void)
+{
+ struct iova *node, *n;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&async_umap_flush_lock, flags);
+ timer_on = 0;
+
+ /* just flush them all */
+ for (i = 0; i < g_num_of_iommus; i++) {
+ if (test_and_clear_bit(i, g_iommus_to_flush))
+ iommu_flush_iotlb_global(&g_iommus[i], 0);
+ }
+
+ list_for_each_entry_safe(node, n, &unmaps_to_do, list) {
+ /* free iova */
+ list_del(&node->list);
+ __free_iova(&((struct dmar_domain *)node->dmar)->iovad, node);
+
+ }
+ list_size = 0;
+ spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+}
+
+static void flush_unmaps_timeout(unsigned long data)
+{
+ flush_unmaps();
+}
+
+static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&async_umap_flush_lock, flags);
+ iova->dmar = dom;
+ list_add(&iova->list, &unmaps_to_do);
+ set_bit((dom->iommu - g_iommus), g_iommus_to_flush);
+
+ if (!timer_on) {
+ mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
+ timer_on = 1;
+ }
+ list_size++;
+ spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+}
+
static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
size_t size, int dir)
{
@@ -1936,13 +2034,21 @@
dma_pte_clear_range(domain, start_addr, start_addr + size);
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
-
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
- size >> PAGE_SHIFT_4K, 0))
- iommu_flush_write_buffer(domain->iommu);
-
- /* free iova */
- __free_iova(&domain->iovad, iova);
+ if (intel_iommu_strict) {
+ if (iommu_flush_iotlb_psi(domain->iommu,
+ domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
+ iommu_flush_write_buffer(domain->iommu);
+ /* free iova */
+ __free_iova(&domain->iovad, iova);
+ } else {
+ add_unmap(domain, iova);
+ /*
+ * queue up the release of the unmap to save the 1/6th of the
+ * cpu used up by the iotlb flush operation...
+ */
+ if (list_size > high_watermark)
+ flush_unmaps();
+ }
}
static void * intel_alloc_coherent(struct device *hwdev, size_t size,
@@ -2266,6 +2372,10 @@
if (dmar_table_init())
return -ENODEV;
+ high_watermark = 250;
+ intel_iommu_debug = debugfs_create_dir("intel_iommu", NULL);
+ debug = debugfs_create_u32("high_watermark", S_IWUGO | S_IRUGO,
+ intel_iommu_debug, &high_watermark);
iommu_init_mempool();
dmar_init_reserved_ranges();
@@ -2281,6 +2391,7 @@
printk(KERN_INFO
"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
+ init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
return 0;
Index: linux-2.6.25-rc2-mm1/drivers/pci/iova.h
===================================================================
--- linux-2.6.25-rc2-mm1.orig/drivers/pci/iova.h 2008-02-26 11:15:46.000000000 -0800
+++ linux-2.6.25-rc2-mm1/drivers/pci/iova.h 2008-02-29 14:11:52.000000000 -0800
@@ -23,6 +23,8 @@
struct rb_node node;
unsigned long pfn_hi; /* IOMMU dish out addr hi */
unsigned long pfn_lo; /* IOMMU dish out addr lo */
+ struct list_head list;
+ void *dmar;
};
/* holds all the iova translations for a domain */
Index: linux-2.6.25-rc2-mm1/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.25-rc2-mm1.orig/Documentation/kernel-parameters.txt 2008-02-26 11:16:01.000000000 -0800
+++ linux-2.6.25-rc2-mm1/Documentation/kernel-parameters.txt 2008-02-26 13:44:06.000000000 -0800
@@ -830,6 +830,10 @@
than 32 bit addressing. The default is to look
for translation below 32 bit and if not available
then look in the higher range.
+ strict [Default Off]
+ With this option on every unmap_single operation will
+ result in a hardware IOTLB flush operation as opposed
+ to batching them for performance.
io_delay= [X86-32,X86-64] I/O delay method
0x80
--