Hi,
I split the patch up into a few pieces, so it can be applied safely.
It builds with allyesconfig on i386 and x86-64, and it's been booted
and tested on both those archs and ppc64 as well.The same patch series can also be applied by pulling
git://git.kernel.dk/linux-2.6-block.git sg
-
This was accidentally introduced by yesterday's change to the SG handling.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
Not sure if this has been reported already, but I needed the trivial fix
to build the ps3rom driver.diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 03f19b8..17b4a7c 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf)req_len = fin = 0;
scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
- kaddr = kmap_atomic(sg_page(sgpnt->page), KM_IRQ0);
+ kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
len = sgpnt->length;
if ((req_len + len) > buflen) {
len = buflen - req_len;
-
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
arch/alpha/kernel/pci_iommu.c | 2 +-
arch/arm/common/dmabounce.c | 2 +-
arch/blackfin/kernel/dma-mapping.c | 3 +--
arch/ia64/hp/common/sba_iommu.c | 2 +-
arch/ia64/hp/sim/simscsi.c | 4 ++--
arch/ia64/sn/pci/pci_dma.c | 2 +-
arch/m68k/kernel/dma.c | 2 +-
arch/mips/mm/dma-default.c | 16 +++++++---------
arch/powerpc/kernel/dma_64.c | 3 +--
arch/powerpc/kernel/ibmebus.c | 3 +--
arch/powerpc/kernel/iommu.c | 2 +-
arch/powerpc/platforms/ps3/system-bus.c | 5 ++---
arch/sparc/kernel/ioport.c | 17 ++++++++---------
arch/sparc/mm/io-unit.c | 2 +-
arch/sparc/mm/iommu.c | 8 ++++----
arch/sparc/mm/sun4c.c | 2 +-
arch/sparc64/kernel/iommu.c | 7 ++-----
arch/sparc64/kernel/iommu_common.c | 13 ++++++-------
arch/sparc64/kernel/ldc.c | 2 +-
arch/sparc64/kernel/pci_sun4v.c | 7 ++-----
arch/x86/kernel/pci-calgary_64.c | 10 ++++++----
arch/x86/kernel/pci-gart_64.c | 4 ++--
arch/x86/kernel/pci-nommu_64.c | 4 ++--
23 files changed, 55 insertions(+), 67 deletions(-)diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index e1c4707..ee07dce 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -465,7 +465,7 @@ EXPORT_SYMBOL(pci_free_consistent);
Write dma_length of each leader with the combined lengths of
the mergable followers. */-#define SG_ENT_VIRT_ADDRESS(SG) (page_address((SG)->page) + (SG)->offset)
+#define SG_ENT_VIRT_ADDRESS(SG) (sg_virt((SG)))
#define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))static void
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 44ab0da..9d371e4 100644
--...
It looks like it could be nice to define and use a helper for
page_address(sg_page(sg)) (although 11 call sites could use it
after this patch)#define sg_pgaddr(sg) page_address(sg_page(sg))
Note that mips sg_{un,}map_sg checked for page_address(sg->page) != 0
before calling __dma_sync(addr + sg->offset, sg->length, direction)
and you changed it to addr = (unsigned long) sg_virt(sg) which
takes sg->offset into account. That said I'm not sure if the original
code was correct for the (page_address(sg->page) == 0 && sg->offset != 0)
case...On Oct. 22, 2007, 20:11 +0200, Jens Axboe <jens.axboe@oracle.com> wrote:
-
I initially thought that may have been a bug, but most of the other arch
iommu code handle it in the same way. I don't think it's a bug, since
they want to clear full page ranges anyway. I should not have changed
this code to take the offset into account - I don't think it's an issue,I think because of a two stage conversion, the page variable addition
predates the sg_virt() helper.--
Jens Axboe-
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
lib/swiotlb.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 752fd95..1a8050a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -35,7 +35,7 @@
#define OFFSET(val,align) ((unsigned long) \
( (val) & ( (align) - 1)))-#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
#define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))/*
--
1.5.3.GIT-
Fix sctp compile
sctp fails to compile with
net/sctp/sm_make_chunk.c: In function 'sctp_pack_cookie':
net/sctp/sm_make_chunk.c:1516: error: implicit declaration of function 'sg_init_table'
net/sctp/sm_make_chunk.c:1517: error: implicit declaration of function 'sg_set_page'use the proper include file.
SCTP maintainers Vlad Yasevich and Sridhar Samudrala are CCed.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
net/sctp/sm_make_chunk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)Index: linux-2.6/net/sctp/sm_make_chunk.c
===================================================================
--- linux-2.6.orig/net/sctp/sm_make_chunk.c
+++ linux-2.6/net/sctp/sm_make_chunk.c
@@ -56,7 +56,7 @@
#include <linux/ipv6.h>
#include <linux/net.h>
#include <linux/inet.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
#include <linux/crypto.h>
#include <net/sock.h>-
Looks good, thanks!
--
Jens Axboe-
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/ecryptfs/crypto.c | 16 +++++++++++-----
fs/ecryptfs/keystore.c | 3 +++
fs/nfsd/nfs4recover.c | 8 +++-----
3 files changed, 17 insertions(+), 10 deletions(-)diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 1ae90ef..0a9882e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -283,7 +283,7 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
pg = virt_to_page(addr);
offset = offset_in_page(addr);
if (sg) {
- sg[i].page = pg;
+ sg_set_page(&sg[i], pg);
sg[i].offset = offset;
}
remainder_of_page = PAGE_CACHE_SIZE - offset;
@@ -713,10 +713,13 @@ ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
{
struct scatterlist src_sg, dst_sg;- src_sg.page = src_page;
+ sg_init_table(&src_sg, 1);
+ sg_init_table(&dst_sg, 1);
+
+ sg_set_page(&src_sg, src_page);
src_sg.offset = src_offset;
src_sg.length = size;
- dst_sg.page = dst_page;
+ sg_set_page(&dst_sg, dst_page);
dst_sg.offset = dst_offset;
dst_sg.length = size;
return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
@@ -742,10 +745,13 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
{
struct scatterlist src_sg, dst_sg;- src_sg.page = src_page;
+ sg_init_table(&src_sg, 1);
+ sg_init_table(&dst_sg, 1);
+
+ sg_set_page(&src_sg, src_page);
src_sg.offset = src_offset;
src_sg.length = size;
- dst_sg.page = dst_page;
+ sg_set_page(&dst_sg, dst_page);
dst_sg.offset = dst_offset;
dst_sg.length = size;
return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 89d9710..263fed8 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1040,6 +1040,9 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
};
int rc = 0;+ sg_i...
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
arch/um/drivers/ubd_kern.c | 2 +-
drivers/ata/libata-core.c | 10 ++++----
drivers/ata/libata-scsi.c | 2 +-
drivers/block/DAC960.c | 2 +
drivers/block/cciss.c | 4 +-
drivers/block/cpqarray.c | 3 +-
drivers/block/cryptoloop.c | 12 ++++++---
drivers/block/sunvdc.c | 1 +
drivers/block/ub.c | 11 +++++----
drivers/block/viodasd.c | 2 +
drivers/ide/cris/ide-cris.c | 4 +-
drivers/ide/ide-probe.c | 4 ++-
drivers/ide/ide-taskfile.c | 2 +-
drivers/ide/mips/au1xxx-ide.c | 6 +---
drivers/ieee1394/dma.c | 2 +-
drivers/ieee1394/sbp2.c | 2 +-
drivers/infiniband/core/umem.c | 11 ++++++---
drivers/infiniband/hw/ipath/ipath_dma.c | 4 +-
drivers/infiniband/hw/ipath/ipath_mr.c | 2 +-
drivers/infiniband/hw/mthca/mthca_memfree.c | 24 ++++++++++++-------
drivers/infiniband/ulp/iser/iser_memory.c | 8 +++---
drivers/md/dm-crypt.c | 21 +++++++++--------
drivers/media/common/saa7146_core.c | 3 +-
drivers/media/video/ivtv/ivtv-udma.c | 4 +-
drivers/media/video/videobuf-dma-sg.c | 8 ++++--
drivers/mmc/card/queue.c | 15 ++++++------
drivers/mmc/host/at91_mci.c | 8 +++---
drivers/mmc/host/au1xmmc.c | 11 +++------
drivers/mmc/host/imxmmc.c | 2 +-
drivers/mmc/host/mmc_spi.c | 8 +++---
drivers/mmc/host/omap.c | 4 +-
drivers/mmc/host/sdhci.c | 2 +-
drivers/mmc/host/tifm_sd.c | 8 +++---
drivers/mmc/host/wbsd.c ...
You forgot s390's zfcp driver. But unfortunately the trivial fix below
doesn't work. No more I/O possible. Swen and/or Christof could you
provide a correct fix for this please? Thanks!---
drivers/s390/scsi/zfcp_def.h | 4 ++--
drivers/s390/scsi/zfcp_erp.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)Index: linux-2.6/drivers/s390/scsi/zfcp_def.h
===================================================================
--- linux-2.6.orig/drivers/s390/scsi/zfcp_def.h
+++ linux-2.6/drivers/s390/scsi/zfcp_def.h
@@ -63,7 +63,7 @@
static inline void *
zfcp_sg_to_address(struct scatterlist *list)
{
- return (void *) (page_address(list->page) + list->offset);
+ return (void *) (page_address(sg_page(list) + list->offset));
}/**
@@ -74,7 +74,7 @@ zfcp_sg_to_address(struct scatterlist *l
static inline void
zfcp_address_to_sg(void *address, struct scatterlist *list)
{
- list->page = virt_to_page(address);
+ sg_set_page(list, virt_to_page(address));
list->offset = ((unsigned long) address) & (PAGE_SIZE - 1);
}Index: linux-2.6/drivers/s390/scsi/zfcp_erp.c
===================================================================
--- linux-2.6.orig/drivers/s390/scsi/zfcp_erp.c
+++ linux-2.6/drivers/s390/scsi/zfcp_erp.c
@@ -363,7 +363,7 @@ zfcp_erp_adisc(struct zfcp_port *port)
retval = -ENOMEM;
freemem:
if (address != NULL)
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
if (send_els != NULL) {
kfree(send_els->req);
kfree(send_els->resp);
@@ -437,7 +437,7 @@ zfcp_erp_adisc_handler(unsigned long datout:
zfcp_port_put(port);
- __free_pages(send_els->req->page, 0);
+ __free_pages(sg_page(send_els->req), 0);
kfree(send_els->req);
kfree(send_els->resp);
kfree(send_els);
-
return sg_virt(list); would be better.
I'll fix up the driver, no worries.
--
Jens Axboe-
ok, thanks!
-
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
crypto/digest.c | 2 +-
crypto/hmac.c | 3 ++-
crypto/scatterwalk.c | 2 +-
crypto/scatterwalk.h | 6 +++---
crypto/tcrypt.c | 4 ++--
crypto/xcbc.c | 2 +-
6 files changed, 10 insertions(+), 9 deletions(-)diff --git a/crypto/digest.c b/crypto/digest.c
index e56de67..8871dec 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -41,7 +41,7 @@ static int update2(struct hash_desc *desc,
return 0;for (;;) {
- struct page *pg = sg->page;
+ struct page *pg = sg_page(sg);
unsigned int offset = sg->offset;
unsigned int l = sg->length;diff --git a/crypto/hmac.c b/crypto/hmac.c
index 8802fb6..e4eb6ac 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -159,7 +159,8 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;sg_set_buf(sg1, ipad, bs);
- sg1[1].page = (void *)sg;
+
+ sg_set_page(&sg[1], (void *) sg);
sg1[1].length = 0;
sg_set_buf(sg2, opad, bs + ds);diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index d6852c3..b9bbda0 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -54,7 +54,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
if (out) {
struct page *page;- page = walk->sg->page + ((walk->offset - 1) >> PAGE_SHIFT);
+ page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
flush_dcache_page(page);
}diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index 9c73e37..87ed681 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -22,13 +22,13 @@static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg)
{
- return (++sg)->length ? sg : (void *)sg->page;
+ return (++sg)->length ? sg : (void *) sg_page(sg);
}static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,...
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
block/ll_rw_blk.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 8025d64..61c2e39 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1354,8 +1354,9 @@ new_segment:
else
sg = sg_next(sg);- memset(sg, 0, sizeof(*sg));
- sg->page = bvec->bv_page;
+ sg_dma_len(sg) = 0;
+ sg_dma_address(sg) = 0;
+ sg_set_page(sg, bvec->bv_page);
sg->length = nbytes;
sg->offset = bvec->bv_offset;
nsegs++;
@@ -1363,6 +1364,9 @@ new_segment:
bvprv = bvec;
} /* segments in rq */+ if (sg)
+ __sg_mark_end(sg);
+
return nsegs;
}--
1.5.3.GIT-
>>>>> "Jens" == Jens Axboe <jens.axboe@oracle.com> writes:
Jens> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Jens> ---
Jens> block/ll_rw_blk.c | 8 ++++++--
Jens> 1 files changed, 6 insertions(+), 2 deletions(-)Jens> diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
Jens> index 8025d64..61c2e39 100644
Jens> --- a/block/ll_rw_blk.c
Jens> +++ b/block/ll_rw_blk.c
Jens> @@ -1354,8 +1354,9 @@ new_segment:
Jens> else
Jens> sg = sg_next(sg);Jens> - memset(sg, 0, sizeof(*sg));
Jens> - sg->page = bvec->bv_page;
Jens> + sg_dma_len(sg) = 0;
Jens> + sg_dma_address(sg) = 0;Why don't you call these something like sg_set_dma_len() and
sg_set_dma_address() instead, to make it clear these set the values
and don't read them?Jens> + sg_set_page(sg, bvec->bv_page);
Esp since you have the sg_set_page() right below it.
sg-> length = nbytes;
sg-> offset = bvec->bv_offset;
Jens> nsegs++;
Jens> @@ -1363,6 +1364,9 @@ new_segment:
Jens> bvprv = bvec;
Jens> } /* segments in rq */Jens> + if (sg)
Jens> + __sg_mark_end(sg);
Jens> +
Jens> return nsegs;
Jens> }Jens> --
Jens> 1.5.3.GITJens> -
Jens> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Jens> the body of a message to majordomo@vger.kernel.org
Jens> More majordomo info at http://vger.kernel.org/majordomo-info.html
Jens> Please read the FAQ at http://www.tux.org/lkml/Jens> !DSPAM:471ce899205391598813817!
-
Hmm.... this breaks s390:
CC block/ll_rw_blk.o
block/ll_rw_blk.c: In function 'blk_rq_map_sg':
block/ll_rw_blk.c:1357: error: implicit declaration of function 'sg_dma_len'
block/ll_rw_blk.c:1357: error: lvalue required as left operand of assignment
block/ll_rw_blk.c:1358: error: implicit declaration of function 'sg_dma_address'
block/ll_rw_blk.c:1358: error: lvalue required as left operand of assignment
make[1]: *** [block/ll_rw_blk.o] Error 1Missing macros and no appropriate members in struct scatterlist since we
don't have DMA. How to fix?
-
From: Heiko Carstens <heiko.carstens@de.ibm.com>
net/xfrm/xfrm_algo.c: In function 'skb_icv_walk':
net/xfrm/xfrm_algo.c:555: error: implicit declaration of function 'sg_set_page'
make[2]: *** [net/xfrm/xfrm_algo.o] Error 1Cc: David Miller <davem@davemloft.net>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
net/xfrm/xfrm_algo.c | 1 +
1 file changed, 1 insertion(+)Index: linux-2.6/net/xfrm/xfrm_algo.c
===================================================================
--- linux-2.6.orig/net/xfrm/xfrm_algo.c
+++ linux-2.6/net/xfrm/xfrm_algo.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
#include <linux/crypto.h>
+#include <linux/scatterlist.h>
#include <net/xfrm.h>
#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
#include <net/ah.h>
-
Thanks, arch fallout... Applied.
--
Jens Axboe-
From: Heiko Carstens <heiko.carstens@de.ibm.com>
CC block/ll_rw_blk.o
block/ll_rw_blk.c: In function 'blk_rq_map_sg':
block/ll_rw_blk.c:1357: error: implicit declaration of function 'sg_dma_len'
block/ll_rw_blk.c:1357: error: lvalue required as left operand of assignment
block/ll_rw_blk.c:1358: error: implicit declaration of function 'sg_dma_address'
block/ll_rw_blk.c:1358: error: lvalue required as left operand of assignment
make[1]: *** [block/ll_rw_blk.o] Error 1
make: *** [block] Error 2Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
block/ll_rw_blk.c | 2 --
1 file changed, 2 deletions(-)Index: linux-2.6/block/ll_rw_blk.c
===================================================================
--- linux-2.6.orig/block/ll_rw_blk.c
+++ linux-2.6/block/ll_rw_blk.c
@@ -1354,8 +1354,6 @@ new_segment:
else
sg = sg_next(sg);- sg_dma_len(sg) = 0;
- sg_dma_address(sg) = 0;
sg_set_page(sg, bvec->bv_page);
sg->length = nbytes;
sg->offset = bvec->bv_offset;
-
We can then transition drivers without changing the generated code.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
include/linux/scatterlist.h | 112 +++++++++++++++++++++++++++++++++++++++---
1 files changed, 104 insertions(+), 8 deletions(-)diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 2dc7464..1645795 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -2,24 +2,37 @@
#define _LINUX_SCATTERLIST_H#include <asm/scatterlist.h>
+#include <asm/io.h>
#include <linux/mm.h>
#include <linux/string.h>+/**
+ * sg_set_page - Set sg entry to point at given page
+ * @sg: SG entry
+ * @page: The page
+ *
+ * Description:
+ * Use this function to set an sg entry pointing at a page, never assign
+ * the page directly. We encode sg table information in the lower bits
+ * of the page pointer. See sg_page() for looking up the page belonging
+ * to an sg entry.
+ *
+ **/
+static inline void sg_set_page(struct scatterlist *sg, struct page *page)
+{
+ sg->page = page;
+}
+
+#define sg_page(sg) ((sg)->page)
+
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
unsigned int buflen)
{
- sg->page = virt_to_page(buf);
+ sg_set_page(sg, virt_to_page(buf));
sg->offset = offset_in_page(buf);
sg->length = buflen;
}-static inline void sg_init_one(struct scatterlist *sg, const void *buf,
- unsigned int buflen)
-{
- memset(sg, 0, sizeof(*sg));
- sg_set_buf(sg, buf, buflen);
-}
-
/*
* We overload the LSB of the page pointer to indicate whether it's
* a valid sg entry, or whether it points to the start of a new scatterlist.
@@ -104,4 +117,87 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
prv[prv_nents - 1].page = (struct page *) ((unsigned long) sgl | 0x01);
}+/**
+ * sg_mark_end - Mark the end of the scatterlist
+ * @sgl: Scatterlist
+ * @nents: Number of entri...
Add a Kconfig entry which will toggle some sanity checks on the sg
entry and tables.Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
include/asm-alpha/scatterlist.h | 3 +++
include/asm-arm/scatterlist.h | 3 +++
include/asm-avr32/scatterlist.h | 3 +++
include/asm-blackfin/scatterlist.h | 3 +++
include/asm-cris/scatterlist.h | 3 +++
include/asm-frv/scatterlist.h | 3 +++
include/asm-h8300/scatterlist.h | 3 +++
include/asm-ia64/scatterlist.h | 3 +++
include/asm-m32r/scatterlist.h | 3 +++
include/asm-m68k/scatterlist.h | 3 +++
include/asm-m68knommu/scatterlist.h | 3 +++
include/asm-mips/scatterlist.h | 3 +++
include/asm-parisc/scatterlist.h | 3 +++
include/asm-powerpc/scatterlist.h | 3 +++
include/asm-s390/scatterlist.h | 3 +++
include/asm-sh/scatterlist.h | 3 +++
include/asm-sh64/scatterlist.h | 3 +++
include/asm-sparc/scatterlist.h | 3 +++
include/asm-sparc64/scatterlist.h | 3 +++
include/asm-v850/scatterlist.h | 3 +++
include/asm-x86/scatterlist_32.h | 3 +++
include/asm-x86/scatterlist_64.h | 3 +++
include/asm-xtensa/scatterlist.h | 3 +++
include/linux/scatterlist.h | 22 ++++++++++++++++++++++
lib/Kconfig.debug | 10 ++++++++++
25 files changed, 101 insertions(+), 0 deletions(-)diff --git a/include/asm-alpha/scatterlist.h b/include/asm-alpha/scatterlist.h
index b764706..440747c 100644
--- a/include/asm-alpha/scatterlist.h
+++ b/include/asm-alpha/scatterlist.h
@@ -5,6 +5,9 @@
#include <asm/types.h>struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+ unsigned long sg_magic;
+#endif
unsigned long page_link;
unsigned int offset;diff --git a/include/asm-arm/scatterlist.h b/include/asm-arm/scatterlist.h
index ab1d85d..ca0a37d 100644
--- a/include/asm-arm/scatterlist.h
+++ b/include/asm-arm/scatterlist.h
@@ -5,6 +5,9 @@
#include &...
Change the page member of the scatterlist structure to be an unsigned
long, and encode more stuff in the lower bits:- Bits 0 and 1 zero: this is a normal sg entry. Next sg entry is located
at sg + 1.
- Bit 0 set: this is a chain entry, the next real entry is at ->page_link
with the two low bits masked off.
- Bit 1 set: this is the final entry in the sg entry. sg_next() will return
NULL when passed such an entry.It's thus important that sg table users use the proper accessors to get
and set the page member.Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
include/asm-alpha/scatterlist.h | 2 +-
include/asm-arm/scatterlist.h | 2 +-
include/asm-avr32/scatterlist.h | 2 +-
include/asm-blackfin/scatterlist.h | 2 +-
include/asm-cris/scatterlist.h | 2 +-
include/asm-frv/scatterlist.h | 2 +-
include/asm-h8300/scatterlist.h | 2 +-
include/asm-ia64/scatterlist.h | 2 +-
include/asm-m32r/scatterlist.h | 2 +-
include/asm-m68k/scatterlist.h | 2 +-
include/asm-m68knommu/scatterlist.h | 2 +-
include/asm-mips/scatterlist.h | 2 +-
include/asm-parisc/scatterlist.h | 2 +-
include/asm-powerpc/scatterlist.h | 2 +-
include/asm-s390/scatterlist.h | 2 +-
include/asm-sh/scatterlist.h | 2 +-
include/asm-sh64/scatterlist.h | 2 +-
include/asm-sparc/scatterlist.h | 2 +-
include/asm-sparc64/scatterlist.h | 2 +-
include/asm-v850/scatterlist.h | 2 +-
include/asm-x86/dma-mapping_32.h | 4 +-
include/asm-x86/scatterlist_32.h | 2 +-
include/asm-x86/scatterlist_64.h | 2 +-
include/asm-xtensa/scatterlist.h | 2 +-
include/linux/scatterlist.h | 78 ++++++++++++++++++++++++-----------
25 files changed, 79 insertions(+), 49 deletions(-)diff --git a/include/asm-alpha/scatterlist.h b/include/asm-alpha/scatterlist.h
index 9173654..b764706 100644
--- a/include/asm-alpha/scatterlist.h
+++ b/in...
You might want to put a BUG_ON(page & 0x3); Make sure
<snip>Boaz
-
That's a really good idea, thanks Boaz! I'll add that.
--
Jens Axboe-
It would be even better if you replaced all the magic numbers with defines
or better accessors.-Andi
-
All? There are two numbers, and all are confined to scatterlist.h
privately. Except the one in blk_rq_map_sg(), which was done on purpose
since I don't want to export that knowledge to others. So we definitely
don't want accessors, I can name the two bit values but don't see much
point in doing it.--
Jens Axboe-
Maybe no point for you, but it would be helpful for any poor soul
who has to read/debug/change the code later.Even if they're limited right now that doesn't mean it'll stay
that way anyways.-Andi
-
I understand, but if you look in the include file that uses the magic
numbers, then there's a big comment block in the beginning describingSince it's reusing lower page bits, we can't go beyond 2 bits anyway. So
I'll be surprised if they expand :-)--
Jens Axboe-
Better safe than sorry...
Is it possible that a chain entry pointer has bit 1 set on architectures
(e.g. m68k) where the natural alignment of 32-bit quantities is _2_ bytes,
not 4?Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.orgIn personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
-
Better make sure that such alignment never happens... But no, I don't
think it will, since these things would generally always have to be
allocated with an allocator, and the *allocator* won't return 2-byte
aligned data structures.Linus
-
On Mon, 22 Oct 2007 12:49:40 -0700 (PDT)
No - but a structure which has other objects in it before the object
being written out may well be 2 byte aligned on M68K and some of the
other externally 16bit platforms - ditto local dynamic objects.Why can't we just make the list one item longer than the entry count and
stick a NULL on the end of it like normal people ? Then you need one bit
which ought to be safe for everyone (and if the bit is a macro any CPU
warped enough to have byte alignment is surely going to have top bits
spare...)Alan
-
Well, quite frankly, equally easy is to just add a
__attribute__((aligned(4)))
or whatever the gcc syntax for that is today.. That guarantees that gcc
lays things out properly.Linus
-
On Mon, 22 Oct 2007 13:44:43 -0700 (PDT)
For structures, not array elements or stack objects. Does gcc now get
aligned correct as an attribute on a stack object ?Still doesn't answer the rather more important question - why not just
stick a NULL on the end instead of all the nutty hacks ?
-
You still do need one bit for the discontiguous case, so it's not like you
can avoid the hacks anyway (unless you just blow up the structure
entirely) and make it a separate member). So once you have that
bit+pointer, using a separate NULL entry isn't exactly prettier.Especially as we actally want to see the difference between
"end-of-allocation" and "not yet filled in", so you shouldn't use NULL
anyway, you should probably use something like "all-ones".Linus
-
Every one is so hysterical about this sg-chaining problem. And massive
patches produced, that when a simple none intrusive solution is proposed
it is totally ignored because every one thinks, "I can not be that stupid".
Well Einstein said: "Simplicity is the ultimate sophistication". So no one
need to feel bad.I'm talking about Benney's Proposition of a while back.
(I'm including it below, cause I can't bother with the
stupid Archives broken search)What Benny was proposing is that the scatterlist pointer
might not have the complete information about sizes and
allocations, but the surrounding code always has, So why
not just pass this information to the decision maker -
sg_next() - so it can make the right choice, before a suicide.
So sg_next() becomes:static inline struct scatterlist *sg_next(struct scatterlist *sg,
int next, int nr)
{
return next < nr ? sg_next_unsafe(sg) : NULL;
}Where sg_next_unsafe(sg) is what the original sg_next() used to be.
and a user code like for_each_sg() becomes:
/*
* Loop over each sg element, following the pointer to a new list if necessary
*/
#define for_each_sg(sglist, sg, nr, __i) \
for (__i = 0, sg = (sglist); sg; sg = sg_next(sg, ++__i, nr))In his patch he shows examples of other uses of sg_next they all fit.
The sg_next usage is new and in few places. Not like the sg->page
all over the kernel.I know he has a patch for the complete kernel, (I know I helped a bit)
and it is a fraction of the size of all the patches that where submitted
after that. And it does not have all the problems that we still have now
with slob allocators, stack, and such.OK Just my $0.2
Boaz Harrosh-----
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 2dc7464..3a27e03 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -30,7 +30,7 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
...
It's all about the end goal - having maintainable and resilient code.
And I think the sg code will be better once we get past the next day or
so, and it'll be more robust. That is what matters to me, not the
simplicity of the patch itself.--
Jens Axboe-
Linus' latest tree, which has your SG-list enhancements included,
certainly works fine here and does not have the problems of the first
iteration.Ingo
-
That's good to hear :-)
I have a series of pending patches where I've collected fallout patches
from people and some from myself here:http://git.kernel.dk/?p=linux-2.6-block.git;a=shortlog;h=sg
or pullable from
git://git.kernel.dk/inux-2.6-block.git sg
As far as I can tell, all archs should now compile and work. I've tested
(compile tested) alpha/arm/ia64/m68k/mips/sh/sparc this morning and
fixed whatever showed up. It's all just a missing linux/scatterlist.h
include or ->page usage.--
Jens Axboe-
i've attached your fixes as a diff against linus-latest below - for
those who'd like to have it in patch form.Ingo
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index ee07dce..2d00a08 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -7,6 +7,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
+#include <linux/scatterlist.h>
#include <linux/log2.h>#include <asm/io.h>
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 9d371e4..52fc6a8 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -29,6 +29,7 @@
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/list.h>
+#include <linux/scatterlist.h>#include <asm/cacheflush.h>
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index b0b034c..b1b4052 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/scatterlist.h>#include <asm/cache.h>
#include <asm/io.h>
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 41f8e32..9448d4e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/scatterlist.h>#include <asm/cacheflush.h>
#include <asm/dma.h> /* for DMA_CHUNK_SIZE */
diff --git a/arch/sparc64/kernel/iommu_common.c b/arch/sparc64/kernel/iommu_common.c
index 78e8277..b70324e 100644
--- a/arch/sparc64/kernel/iommu_common.c
+++ b/arch/sparc64/kernel/iommu_common.c
@@ -233,6 +233,11 @@ unsigned long prepare_sg(struct scatterlist *sg, int nents)
dma_sg->dma_address = dent_addr;
d...
The below are still needed for m68k
---m68k: sg fallout
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
arch/m68k/kernel/dma.c | 2 +-
drivers/scsi/atari_NCR5380.c | 5 ++---
drivers/scsi/sun3x_esp.c | 4 ++--
net/ieee80211/ieee80211_crypt_tkip.c | 2 +-
net/ieee80211/ieee80211_crypt_wep.c | 2 +-
net/mac80211/wep.c | 2 +-
6 files changed, 8 insertions(+), 9 deletions(-)--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -9,10 +9,10 @@
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/scatterlist.h>
#include <linux/vmalloc.h>#include <asm/pgalloc.h>
-#include <asm/scatterlist.h>void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t flag)
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -477,10 +477,9 @@ static void merge_contiguous_buffers(Scsfor (endaddr = virt_to_phys(cmd->SCp.ptr + cmd->SCp.this_residual - 1) + 1;
cmd->SCp.buffers_residual &&
- virt_to_phys(page_address(cmd->SCp.buffer[1].page) +
- cmd->SCp.buffer[1].offset) == endaddr;) {
+ virt_to_phys(sg_virt(&cmd->SCp.buffer[1])) == endaddr;) {
MER_PRINTK("VTOP(%p) == %08lx -> merging\n",
- page_address(cmd->SCp.buffer[1].page), endaddr);
+ page_address(sg_page(&cmd->SCp.buffer[1])), endaddr);
#if (NDEBUG & NDEBUG_MERGING)
++cnt;
#endif
--- a/drivers/scsi/sun3x_esp.c
+++ b/drivers/scsi/sun3x_esp.c
@@ -332,8 +332,8 @@ static void dma_mmu_get_scsi_sgl (struct
struct scatterlist *sg = sp->SCp.buffer;while (sz >= 0) {
- sg[sz].dma_address = dvma_map((unsigned long)page_address(sg[sz].page) +
- sg[sz].offset, sg[sz].length);
+ sg[sz].dma_address = dvma_map((unsigned long)sg_virt(&sg[sz]),
+ sg[sz].length);
...
The wep.c was already applied, I applied the rest of them. Thanks!
--
Jens Axboe-
OK, added. Thanks!
--
Jens Axboe-
But that is exactly what his patch is. Much more robust. Because you do not
relay on sglist content but on outside information, that you already have.
Have you had an hard look at his solution? It just simply falls into place.
Please try it out for yourself. I did, and it works.Boaz
-
Sure, I looked at it, it's not exactly rocket science, I do understand
what it achieves. I don't think the patch is bad as such, I'm merely
trying to state that I think the end code AND interface will be much
nicer with the current direction that the sg helpers are moving.It does rely on outside context, because you need to pass in the sglist
number. In my opinion, this patch would be a bandaid for the original
chain code until we got around to fixing the PAGEALLOC crash. Which we
did, it's now merged. The patch doesn't make the code cleaner, it makes
it uglier. It'll work, but that still doesn't mean I have to agree it's
a nice design.--
Jens Axboe-
A nice design is to have an struct like BIO. That holds a pointer to the
array of scatterlists, size, ..., and a next and prev pointers to the next
chunks. Than have all kernel code that now accepts scatterlist* and size
accept a pointer to such structure. And all is clear and defined.But since we do not do that, and every single API in the kernel that
receives a scatterlist pointer also receives an sg_count parameter,
than I do not see what is so hacky about giving that sg_count parameter
to the one that needs it the most. sg_next();OK I guess this is all a matter of taste so there is no point arguing
about it any more. I can see your view, and the work has been done so
I guess there is no point going back. If it all works than it's for the
best.Thanks Jens for doing all this, The performance gain is substantial
and we will all enjoy it.Boaz
-
Well, I'd personally actually prefer to *not* have the count be passed
down explicitly, because it's just too error prone. So I'd much rather see
the count implicit in the list: whether it's in an explicit header
structure (that is the *only* thing passed down) or whether it's embedded
in the list itself is not important.Since the list itself has to have the "next pointer" for chaining, and
thus already has "embedded information" in it, it actually does make sense
in my opinion to just embed the end-of-list information too. And the end
result right now is pretty simple, with "sg_next()" being really simple to
use, and there being no way to screw things up by getting the count and
the sg pointer out of sync.My biggest complaint right now is that a lot of users of the sg *filling*
functions were mindlessly converted, so we have code likecryptoloop.c: sg_set_page(&sg_in, in_page);
cryptoloop.c: sg_in.offset = in_offs;
cryptoloop.c: sg_in.length = sz;which is just really stupid, and we should have a function for that. But
worse is code like this:ub.c: sg_set_page(sg, virt_to_page(sc->top_sense));
ub.c: sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
ub.c: sg->length = UB_SENSE_SIZE;which again was converted "line by line" and we actually *do* have a
function to do the above three lines assg_set_buf(sg, sc->top_sense, UB_SENSE_SIZE);
where that *single* line is just tons shorter but more importantly, more
readable, than the mess that is a brute-force conversion.So I think the SG stuff looks ok now, but I think we have a lot of "fix up
the rough edges" to go!(The above is not the only case. Just grep for "sg_set_page", and you'll
see several examples of this kind of hard-to-read code. Basically, I don't
think it's ever a good idea to initialize the SG entries one by one, and
even when we have a hard page/offset/size thing, we should not set them
...
Well, the duplication is bad, but walking lists to find the length is
inefficient so you pass around the length as well.What irritates me more is that scatterlists aren't quite generically useful.
The virtio code wants to join a scatterlist created by blk_rq_map_sg() with
two others, yet it won't work because sg_chain() doesn't remove the end
marker from the first entry.If this patch weren't already included, I'd be strongly arguing for the bio
idea: I find the chained sg code tricksy and ugly (sorry Jens).To be constructive, how's this (BTW, why @arg@, I thought it was simply
"@arg"?)===
Make sg_chain() a little more genericAllow chaining when the first chain already has an end marker, and
change it to a slightly clearer semantic (the number of used entries
in the array, not that number plus one).Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 61fdaf0..24bbc92 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -778,7 +778,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
* ended up doing another loop.
*/
if (prev)
- sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+ sg_chain(prev, SCSI_MAX_SG_SEGMENTS-1, sgl);/*
* if we have nothing left, mark the last segment as
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index df7ddce..c1ef145 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -147,12 +147,11 @@ static inline struct scatterlist *sg_last(struct scatterlist *sgl,
/**
* sg_chain - Chain two sglists together
* @prv: First scatterlist
- * @prv_nents: Number of entries in prv
+ * @prv_nents: Number of entries used in prv
* @sgl: Second scatterlist
*
* Description:
- * Links @prv@ and @sgl@ together, to form a longer scatterlist.
- *
+ * @prv[@prv_nents] is used as a link to join @prv to @sgl.
**/
static inline void sg_chain(struct...
Nobody should *ever* walk the list to find the length. Does anybody really
do that? Yes, we pass the thing down, but do people *need* it?[ Side note: some of the users of that length currently would seem to be
buggy in the presense of continuation entries, and seem to assume that
the "list" is just a contiguous array. In fatc, that's almost the only
valid use for the "count" thing, since any other use _has_ to walk it
entry by entry anyway, no? ]The thing is, nobody should care. You walk the list to fill things in, or
to write it out to some HW-specific DMA table, you should never care about
the length. However, you *do* care about the "where does it end" part: to
be able to detect overflows (which should never happen, but from a
debugging standpoint it needs to be detectable rather than just silently
use or corrupt memory).But if people really want/need the length, then we damn well should have a
"header" thing, not two independent "list + length" parameters.Linus
-
Yes, I need it for devices that use the macintosh DBDMA
(descriptor-based DMA) hardware. The DBDMA hardware reads an array of
descriptors from system RAM, so I need to allocate an array and fill
it in with DBDMA command blocks (and then dma-map it and point theMaybe the drivers for devices that use DBDMA are now buggy. Certainly
filling in the array of DBDMA command blocks involves walking the
list, but it would extremely useful to know how much to allocate
before we start filling them in. So we at least need an upper bound
on the number of "real" entries, even if we don't have the exact
number.Paul.
-
Yes, for allocation purposes you'd need the size ahead of time, agreed.
Hmm. Depending on where you do this, and if this is some block-layer
specific driver/code (rather than necessarily a generic SG thing), you do
have the req->nr_phys_segments thing which should be that for you (ie the
SG list may have _fewer_ requests in it in case some of those entries got
squashed together due to be contiguous).But yeah, I don't think it would be wrong at all to have a
struct scatterlist_head {
unsigned int entries;
unsigned int flags; /* ? */
struct scatterlist *sg;
};which would be passed down at higher levels.
Linus
-
Hi all,
This patch implements a header for a linked list of scatterlist
arrays, rather than using an extra entry and low pointer bits to chain them
together. I've tested that it's sane for virtio (which uses struct
scatterlist).Features:
1) Neatens code by including length in structure.
2) Avoids end ambiguity by including maximum length too.
3) Works fine with old "sg is an array" interfaces.
4) Kinda icky for stack declaration, so hence a helper is created.
5) Lacks magic.I reverted (most of?) the scatterlist chaining changes to create these
patches, so it won't apply to your kernels. The reversion patch isn't
interesting, so I haven't posted it.Thanks,
Rusty.diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 4efbd9c..ce7e581 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -5,6 +5,51 @@
#include <linux/mm.h>
#include <linux/string.h>+/**
+ * struct sg_ring - a ring of scatterlists
+ * @list: the list_head chaining them together
+ * @num: the number of valid sg entries
+ * @max: the maximum number of sg entries (size of the sg array).
+ * @sg: the array of scatterlist entries.
+ *
+ * This provides a convenient encapsulation of one or more scatter gather
+ * arrays. */
+struct sg_ring
+{
+ struct list_head list;
+ unsigned int num, max;
+ struct scatterlist sg[0];
+};
+
+/* This helper declares an sg ring on the stack or in a struct. */
+#define DECLARE_SG(name, max) \
+ struct { \
+ struct sg_ring ring; \
+ struct scatterlist sg[max]; \
+ } name
+
+/**
+ * sg_ring_init - initialize a scatterlist ring.
+ * @sg: the sg_ring.
+ * @max: the size of the trailing sg array.
+ *
+ * After initialization sg is alone in the ring. */
+static inline void sg_ring_init(struct sg_ring *sg, unsigned int max)
+{
+ INIT_LIST_HEAD(&sg->list);
+ sg->max = max;
+}
+
+/**
+ * sg_ring_next - next array in a scatterlist ring.
+ * @sg: the sg_ring.
+ *
+ * After init...
Hi Rusty,
I don't know where these patches are going, but please put the
trailing */ in all of these kernel-doc descriptions (nice ones :)---
~Randy
-
Example using virtio.
The interface actually improves because we don't need to hand two lengths to
add_buf (it needs an input and output sg[], so we used to hand one pointer
and a counter of how many were in and how many out, now we can neatly hand
two separate sgs).diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index a901eee..6a9b54d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -23,7 +23,9 @@ struct virtio_blk
mempool_t *pool;/* Scatterlist: can be too big for stack. */
- struct scatterlist sg[3+MAX_PHYS_SEGMENTS];
+ DECLARE_SG(out, 1);
+ DECLARE_SG(in, 1);
+ DECLARE_SG(sg, MAX_PHYS_SEGMENTS);
};struct virtblk_req
@@ -69,8 +71,8 @@ static bool blk_done(struct virtqueue *vq)
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
struct request *req)
{
- unsigned long num, out, in;
struct virtblk_req *vbr;
+ struct sg_ring *in;vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
if (!vbr)
@@ -94,23 +96,24 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
if (blk_barrier_rq(vbr->req))
vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;- /* We have to zero this, otherwise blk_rq_map_sg gets upset. */
- memset(vblk->sg, 0, sizeof(vblk->sg));
- sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
- num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
- sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr));
+ sg_init_single(&vblk->out.ring, &vbr->out_hdr, sizeof(vbr->out_hdr));
+ sg_ring_init(&vblk->sg.ring, ARRAY_SIZE(vblk->sg.sg));
+ vblk->sg.ring.num = blk_rq_map_sg(q, vbr->req, vblk->sg.sg);
+ sg_init_single(&vblk->in.ring, &vbr->in_hdr, sizeof(vbr->in_hdr));if (rq_data_dir(vbr->req) == WRITE) {
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
- out = 1 + num;
- in = 1;
+ /* Chain write request onto output buffers. */
+ list_add...
Do you really allocate a fresh table for every command, or just a max
That'd be fine with me as well, but I really don't think that a lot of
people really do need the sg count when you can just loop over the table
until it returns NULL.--
Jens Axboe-
There are a number of indicators that need to be kept in sync, depending
on the usage. The number of entries is set when it is allocated and is
currently needed to free it up (note that in the sgtable "sketch" James
proposed we saved the sg pool index in the sgtable header and used it to
free each chunk to the right pool). The number of entries is then used in
many places to scan the list, however, after the sg list is dma mapped, the
dma mapping may be shorter than the original sg list when multiple pages are
coalesced and there we need to defer to use the dma_length (plus the number
of entries) to determine the end of the list.IMO I think that the byte count can be used authoritatively to scan
the contents of the sg list either before or after dma mapping
while the number of entries is relevant to walking the list in a context free-
What is the bio idea? A bio works in essentially the same way, the only
difference is having a specific next pointer. It's still just a linkedWe definitely should clear any other markers, that makes sense.
--
Jens Axboe-
Well currently sg_chain() only joins "incomplete" (ie. unterminated) sg
chains. That works great for you, but it feels more like a special purposeIt was suggested by analogy earlier in this thread, to use a two-level
structure.In this case I would have first renamed struct scatterlist to struct
scatterelem. Then struct scatterlist looks like:struct scatterlist {
unsigned int num;
struct scatterelem elems[0];
};We'd want a nice macro to declare them for the stack case:
#define DEFINE_SCATTERLIST(name, elems) \
struct { \
struct scatterlist sg; \
struct scatterelem elems[num]; \
} nameNow we've tied the number and array together, we can introduce:
struct sg_multilist
{
unsigned int num_scatterlists;
struct scatterlist *sg_array[0];
};And, of course, a common way to represent a one-sglist array:
#define DEFINE_SG_MULTI(name, num) \
struct { \
struct sg_multilist ml; \
struct scatterlist *sg_array; \
struct scatterlist sg; \
struct scatterelem elems[num]; \
} name = { .ml = { 1 }, .sg_array = &name.sg }Now simply replace all the places which expect a "struct scatterlist"
with "struct sg_multilist" and we're done.Using dangling structures is not as neat as using pointers, but it's very
I changed the sg_chain() function not to take one off the argument. It made
more sense when I wrote the virtblk code (here it's natural, since the numAgreed, and it was the use of "prv_nents - 2" in that code which made me think
the arg should be "num used" not "one past the num used".Cheers,
Rusty.
-
To correct my own thoughts, it'd be better to just put a "struct list_head
list;" in there for chaining. That's more along standard kernel lines, and
neatly handles the single-scatterlist case.Cheers,
Rusty.
-
I modified sg_set_page() to take a length and offset argument, and
converted these silly sg_set_page(.., virt_to_page(foo)) to just use
sg_set_buf() instead:30 files changed, 93 insertions(+), 152 deletions(-)
and it's definitely a win. Added to the pending bits...
--
Jens Axboe-
As it no longer sets the page only, perhaps it's a good idea to rename
sg_set_page() to sg_set()?Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.orgIn personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
-
sg_set_buf() also sets length and offset, sg_set_page() is just a mirror
of that. So I'd prefer to keep the naming.--
Jens Axboe-
I agree. And it's not like you can get it wrong, since if you only give
the "page" argument, the preprocessor will complain loudly.I think "sg_set_x()" is now rather logical - we fill in the SG entry
(entirely), and the only question is _how_ we do it (which is what "x"
says - using a page, or a kernel buffer).Linus
-
Hmmm, sg_set_phys/sg_set_virt to be more symmetrical to
sg_phys/sg_virt?OG.
-
(please don't drop cc lists)
That doesn't make any sense. Both sg_set_buf() and sg_set_page() set the
same thing in the sg entry, the input is just different. It has nothing
to do with setting the physical value, for instance.--
Jens Axboe-
Ok. I misunderstood the sg_virt/sg_phys difference I guess. No
problem.OG.
-
Not all paths need to know the exact number though, and with the changes
you could legitimately pass in just the header and iteration would workYes agreed, debating taste is usually not very interesting as we wont
My pleasure, I just wish it could have been a little less painful. But
in a day or two, it should all be behind us and we can move forward with
making good use of it.--
Jens Axboe-
The stack pointer must be even (i.e. 2 byte-alignment).
But it looks like current gcc always allocates multiples of 4 bytes on
the stack, probably for performance reasons.Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.orgIn personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
-
From: Linus Torvalds <torvalds@linux-foundation.org>
Indeed that's the crux of the matter, we need to express a trinary
state "end of scatterlist, next entry is linear, next entry is
indirect" plus a pointer for the indirect case.Generally, Jens was doing a good job cooking up the patch that
implemented this fully and I took care of making sure tricky
ports like sparc64 built cleanly etc.He went away for a few days, but when he gets back we should seriously
work on integrating his work.I fully recognize Alan's m68k on-stack alignment concern. The on-stack
cases are troublesome in other ways as well, and I think therefore the
way to move forward is to convert those to some kind of dynamic scheme.
Usually such code is working in a locked context on some object
(crypto instance, for example) and thus the scatterlist chunk can
be embedded into that object for ensured alignment.
-
Certainly seems safer than the current "let's run off the end of the
list if anything bad happens" setup... And I do not think allocating
n+1 scatterlist entries will have much of a negative impact.Jeff
-
It'll mean m-1 scatterlists fit on a slab.
--
Mathematics is the supreme nostalgia of our time.
-
On Mon, 22 Oct 2007 16:47:07 -0500
Is that really a credible space issue ?
-
Yes. Especially if m is 2 or 1. A scatterlist on 64-bit x86 looks like
it takes 32 bytes, which means 128 elements fit on a page. One more
spills - ouch!But maybe chaining means this doesn't matter any more. Maybe we can
even pick a nice moderate sg size and reduce the number of mempools we
need for these things.--
Mathematics is the supreme nostalgia of our time.
-
...and its trivial to reduce that number to 127 without noticeable
effect, really.Jeff
-
Alternatively, I proposed to check for end of list in sg_next
by calling it with the next iterator value and number of list elements.
We tried that patch here and it seems like a reasonable alternative.-
Also, the current version of SLOB will return objects aligned at 2 bytes if the
I'm guessing the extra entry makes slab-like allocators unhappy.
--
Mathematics is the supreme nostalgia of our time.
-
How about stack allocations?
--
Jens Axboe-
Fix fallout from 18dabf473e15850c0dbc8ff13ac1e2806d542c15:
In file included from include/linux/dma-mapping.h:52,
from drivers/base/dma-mapping.c:10:
include/asm/dma-mapping.h: In function 'dma_map_sg':
include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:288: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:289: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h:290: error: 'struct scatterlist' has no member named 'page'
include/asm/dma-mapping.h: In function 'dma_sync_sg_for_cpu':
include/asm/dma-mapping.h:331: error: 'struct scatterlist' has no member named 'page'drivers/scsi/ps3rom.c: In function 'fetch_to_dev_buffer':
drivers/scsi/ps3rom.c:150: error: 'struct scatterlist' has no member named 'page'Signed-off-by: Olof Johansson <olof@lixom.net>
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 65be95d..ff52013 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -285,9 +285,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
BUG_ON(direction == DMA_NONE);for_each_sg(sgl, sg, nents, i) {
- BUG_ON(!sg->page);
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
- sg->dma_address = page_to_bus(sg->page) + sg->offset;
+ BUG_ON(!sg_page(sg));
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+ sg->dma_address = page_to_bus(sg_page(sg)) + sg->offset;
}return nents;
@@ -328,7 +328,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev,
BUG_ON(direction == DMA_NONE);for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
}stat...
Applied.
--
Jens Axboe-
More fallout from sg_page changes, found with powerpc allyesconfig:
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'Signed-off-by: Olof Johansson <olof@lixom.net>
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738..a3037f3 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -1776,7 +1776,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(chunk->page_list[i].page)
+ pgaddr = page_to_pfn(sg_page(chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = phys_to_abs(pgaddr +
(pginfo->next_hwpage *
@@ -1832,7 +1832,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
{
int t;
for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ u64 pgaddr = page_to_pfn(sg_page(page_list[t])) << PAGE_SHIFT;
ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
*(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1867,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
if (nr_k...
Thanks a lot Olof, applied both fixups!
--
Jens Axboe-
More fallout from sg_page changes:
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user1':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1779: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_check_kpages_per_ate':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1835: error: 'struct scatterlist' has no member named 'page'
drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf_user2':
drivers/infiniband/hw/ehca/ehca_mrmw.c:1870: error: 'struct scatterlist' has no member named 'page'Signed-off-by: Olof Johansson <olof@lixom.net>
---
I messed up the second fix. :( please replace with this.
-Olof
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738..ead7230 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -1776,7 +1776,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
list_for_each_entry_continue(
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(chunk->page_list[i].page)
+ pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
<< PAGE_SHIFT ;
*kpage = phys_to_abs(pgaddr +
(pginfo->next_hwpage *
@@ -1832,7 +1832,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
{
int t;
for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
*(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1867,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
for (i = pginf...
No problem, applied.
--
Jens Axboe-
| Greg Kroah-Hartman | [PATCH 006/196] Chinese: add translation of oops-tracing.txt |
| Andrew Morton | Re: -mm merge plans for 2.6.23 -- sys_fallocate |
| Eric W. Biederman | [PATCH] nfs lockd reclaimer: Convert to kthread API |
| James Bottomley | Re: Integration of SCST in the mainstream Linux kernel |
git: | |
| David Miller | [GIT]: Networking |
| Gerrit Renker | [PATCH 03/37] dccp: List management for new feature negotiation |
| David Miller | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
