>From 6de5c54194ff2f36b4f7e34bdf118f52f6a57239 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Wed, 27 Feb 2008 18:02:13 +0200
Subject: [PATCH] IB/core: Add creation flags to QPs
This will allow a kernel verbs consumer to create a QP
and pass special flags to the hw layer. This patch also
defines one such flag for LSO support.
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
---
drivers/infiniband/core/uverbs_cmd.c | 1 +
include/rdma/ib_verbs.h | 5 +++++
2 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 495c803..9e98cec 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1065,6 +1065,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.srq = srq;
attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
attr.qp_type = cmd.qp_type;
+ attr.create_flags = 0;
attr.cap.max_send_wr = cmd.max_send_wr;
attr.cap.max_recv_wr = cmd.max_recv_wr;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 40ff512..7ee0077 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -495,6 +495,10 @@ enum ib_qp_type {
IB_QPT_RAW_ETY
};
+enum qp_create_flags {
+ QP_CREATE_LSO = 1 << 0,
+};
+
struct ib_qp_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
@@ -505,6 +509,7 @@ struct ib_qp_init_attr {
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
u8 port_num; /* special QP types only */
+ enum qp_create_flags create_flags;
};
enum ib_rnr_timeout {
--
1.5.4.4
_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> @@ -505,6 +509,7 @@ struct ib_qp_init_attr {
> enum ib_sig_type sq_sig_type;
> enum ib_qp_type qp_type;
> u8 port_num; /* special QP types only */
> + enum qp_create_flags create_flags;
> };
I'm dubious about this. It seems to me like everything (including the
mlx4 low-level driver changes for LSO) would be simpler to implement
and use if we just extend the qp_type to include IB_QPT_UD_LSO. For
example it eliminates the need to test if someone tries to create an
LSO QP for a non-UD transport and return an error (although I don't
see that test in your code anyway).
As I recall, the XRC patches want this flags field too. But does it
work there if we just add another XRC QP type too?
- R.
_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
I could do with defining a new QP type but the point is that you can still post LSO WRs even if you did not create an LSO QP - it's just that in some cases the post may fail due to insufficient buffer space. Specifying the creation flag tells the driver to reserve memory for LSO needs and so to ensure that any WR with any number of gather entries _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Not as simple. XRC has 2 QP types -- a regular XRC QP, and a receive-only
XRC QP (created only by a userspace app for receiving into XRC SRQs only,
but owned by the kernel -- so that the creating app can terminate without
the XRC RCV QP being destroyed).
There are many places where I test for IB_QPT_XRC -- and I would need to test for all the
XRC qp types in these places. Just doing a grep on the kernel_patches/fixes for the mlx4 driver yields:
..../ofed_kernel/kernel_patches/fixes> grep IB_QPT_XRC mlx4*
mlx4_0070_xrc.patch:+ if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) {
mlx4_0070_xrc.patch:+ if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) {
mlx4_0070_xrc.patch:+ if (init_attr->qp_type == IB_QPT_XRC)
mlx4_0070_xrc.patch:+ if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC)
mlx4_0070_xrc.patch:+ if (!pd->uobject && !init_attr->srq && init_attr->qp_type != IB_QPT_XRC)
mlx4_0070_xrc.patch:+ if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC)
mlx4_0070_xrc.patch:+ if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC)
mlx4_0070_xrc.patch:+ case IB_QPT_XRC:
mlx4_0070_xrc.patch:+ if (init_attr->qp_type == IB_QPT_XRC)
mlx4_0070_xrc.patch:+ case IB_QPT_XRC: return MLX4_QP_ST_XRC;
mlx4_0070_xrc.patch:+ if (ibqp->qp_type == IB_QPT_XRC)
mlx4_0070_xrc.patch:+ if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC &&
mlx4_0070_xrc.patch:+ if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC)
mlx4_0120_xrc_kernel.patch:+ case IB_QPT_XRC:
mlx4_0125_xrc_kernel_missed.patch:mlx4: fix some missed spots for IB_QPT_XRC qps.
mlx4_0125_xrc_kernel_missed.patch:+ case IB_QPT_XRC:
mlx4_0125_xrc_kernel_missed.patch:+static const int mlx4_ib_qp_attr_mask_table[IB_QPT_XRC + 1] = {
mlx4_0125_xrc_kernel_missed.patch:+ [IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
mlx4_0125_xrc_kernel_missed.patch:+ qp->ibqp.qp_type == IB_QPT_XRC) {
mlx4_0170_shrinking_wqe.patch: ...Our ehca2 supports sort of low-latency QP for UD and RC. It would be great if we can e.g. enhance above enum like this QP_CREATE_LL = 1 << 1 If you agree with, I'll create a patch. For kernel space the changes within ehca should not be that much. Thx Nam _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> Our ehca2 supports sort of low-latency QP for UD and RC. It would be great > if we can e.g. enhance above enum like this > QP_CREATE_LL = 1 << 1 OK, that actually convinces me that this is useful infrastructure. However, please use 1 << 2 for your flag since 0 is LSO and 1 is used by XRC already. - R. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Please use QP_CREATE_LL 1 << 2
I'm already using 1<<1 for XRC receive QPs:
enum qp_create_flags {
- QP_CREATE_LSO = 1 << 0,
+ QP_CREATE_LSO = 1 << 0,
+ QP_CREATE_XRC_RCV = 1 << 1,
};
(from OFA 1.3, file kernel_patches/fixes/core_0110_xrc_rcv.patch)
Thanks!
_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Roland, All, How about making the qp_type field a bit mask, such that in the ipoib LSO use case it would be (UD | LSO) and in the ipoib ehca case (UD | LL), etc. The bit mask change would also be propagated up to libibverbs and be defined in a way such that it preserves backward compatibility re the qp_type field for users of libibverbs that did not changed their code. I don't think it would make the XRC merge harder, and it would be very helpful in deploying the "block loopback" feature of the connectx, which in ofed 1.3 was implemented system wide (set or unset, see the patch below) and now can be set per app per qp, so IPoIB would create its qp _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
You can't have your cake and eat it too. You can't both use a bitmap
and preserve backwards compatibility.
The current QP types are:
enum ib_qp_type {
/*
* IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
* here (and in that order) since the MAD layer uses them as
* indices into a 2-entry table.
*/
IB_QPT_SMI,
IB_QPT_GSI,
IB_QPT_RC,
IB_QPT_UC,
IB_QPT_UD,
IB_QPT_XRC,
IB_QPT_RAW_IPV6,
IB_QPT_RAW_ETY
};
This means that we are using the 3 LSBs in an enumerated fashion (integers 0..7), so
we can't use a bitmap here. This leads us to a mixed field anyway --
3 bits for existing QP types (integers 0..6), and 29 bits of bit-map.
Why not just keep things separate, and use the enumeration as is for the existing types,
and keep the bitmap as a separate "flags" field -- this is much cleaner.
- Jack
_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Basically, my motivation is to find a way to integrate the block-loopback feature without breaking the libibverbs (kernel/user) and (lib/app) ABI. So in that sense I do hope there's a way to eat the cake in a way which is not notable by libibverbs consumers which are compiled and/or linked in any static/dynamic manner against an older version of the library. I assume that space wise (enum == unsigned int == u32) for the compiler. So with that assumption at hand, the above enum has the values 0...7 occupied, what would be the problem to replace in the kernel .h file, the enum qp_type field with u32 qp_type_bits field where the above types getting bits 0...7 respectively and the new "types" LSO, LL, BL getting bits 8,9,10 . Later change the libibverbs .h file to match this?! Or. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
This *does* break the ABI. An older libibverbs will not interpret the qp_type in the same way as the new kernel will, so you will get a mish-mash. - Jack _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
This breakage is related to ---forward-- compatibility - that is have an app built against a new version of libibverbs working with --older-- version of libibverbs. I wanted to see that we have backward compatibility. Is libibverbs protected against such breakage all over the place? Or. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
No, it isn't. If the customer has an old libibverbs running against a new kernel (which uses your bitmap suggestion of bits 0..7), all his apps will fail to run (the qp_type passed by the old libibverbs to the new kernel will be misinterpreted). - Jack _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Ok, Jack, I see your point now, sorry for being slow to catch up, so Yes, so way we can go the somehow not very elegant way, that is have the uverbs code examine the u32 value and if the only non zero bits are among the 3 LS ones, uses the values from the old enum and if any of the remaining 29 bits are set consider their values under the bit field and this means breaking the ABI, correct. If such a breakage is going to happen anyway for the merge of XRC into the mainline kernel and the official libibverbs as maintained by Roland, I guess this is fine. I see now that the device capabilities enum is managed in bit field fashion so at least there the kernel can advertise easily the block-loopback support as you did for XRC (below) why have you left 6 unused bits _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
BTW: qp_type in struct ib_uverbs_create_qp is a __u8:
struct ib_uverbs_create_qp {
__u64 response;
__u64 user_handle;
__u32 pd_handle;
__u32 send_cq_handle;
__u32 recv_cq_handle;
__u32 srq_handle;
__u32 max_send_wr;
__u32 max_recv_wr;
__u32 max_send_sge;
__u32 max_recv_sge;
__u32 max_inline_data;
__u8 sq_sig_all;
__u8 qp_type;
__u8 is_srq;
__u8 qp_create_flags;
__u64 driver_data[0];
};
Nam
_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
mmm, we can still scale up to 5 features on top of the existing types. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Oops. - Jack _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
I see this as a reserved field in OFED 1.3 . Is this something that you added? - Jack _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
added? Right, omm, my fault. Was testing my private stuff. qp_create_flags should read reserved. Thx for pointing that out. Nam _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Actually, we did not break the ABI for XRC (we had to do cartwheels to avoid this, but what the heck). The create_flags field was added ONLY to struct ib_qp_init_attr (kernel only), and was not exported upwards to the userspace API (so as not to break the ABI). I got around the create_flags problem by adding a new verb to userspace (ibv_create_xrc_rcv_qp() ) with its own ABI to kernel space. Since the kernel-space function (added to uverbs_cmd: ib_uverbs_create_xrc_rcv_qp() ) "knew" that it was creating an XRC_RCV qp, it set the flag in ib_qp_init_attr appropriately. Additionally, Eli did not need user-space involvement for his LSO flag -- so we escaped needing to increment the ABI version number. If we need to use the create_flags field in userspace, we WILL need to break the ABI. You can avoid doing this in one of 2 ways: Either: create a new QP type Or: add new verbs to the core. - Jack _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Jack - OK, got it, and thanks a lot! for the coaching. Roland - in the case of the connectx block loopback feature, it seems that the simplest way to go would for user space is to create a new QP type IB_QPT_UD_BL[block-loopback] which is translated by the uverbs layer to what ever scheme we decide on for the kernel (eg create_flags, qp_types, bit fields, etc). As for IPoIB, it would create a UD/LSO/BL QP, thoughts? Or. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
What is your recommendation wrt/ encoding scheme for qp_type and create_flags? For ehca we need user space support for LL QP. And we can implement that as a flag (my favorite) or as two additional qp_types (IB_QPT_UD_LL and IB_QPT_RC_LL). Anyway I'm flexible in whatever approach you choose. PS: See also this thread http://lists.openfabrics.org/pipermail/general/2008-March/048445.html Thanks! Nam _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> What is your recommendation wrt/ encoding scheme for qp_type and > create_flags? I don't think I know enough to make a pronouncement yet. Maybe someone can summarize the possibilities and see how they work for XRC, ehca LL, block-loopback, etc? Bumping ABI is painful but on the other hand an explosion of new verbs is ugly. So it's all going to be a tradeoff. - R. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Jack, I am trying to figure out what would be the least painful way to allow for specifi Looking on the libibverbs - XRC instance (git://git.openfabrics.org/ofed_1_3/libibverbs.git) and re-reading your email, my understanding is that the way you went was 1. add a new field at the end of struct ibv_qp_init_attr 2. add bunch of new XRC verbs packed in struct ibv_xrc_ops 3. both create_qp & create_xrc_rcv_qp get ibv_qp_init_attr, the latter looks on the xrc field 4. add ibv_xrc_ops at the end of the ibv_context I have pasted below the relevant code from verbs.h, as I can't point on one patch that does this all, since there were some fixes/changes since the initial commit. So if we want to have qp creation verb that gets creation flags, we can can A. add create_flags field to the end of ibv_qp_init_attr B. introduce struct ibv_qp * (*create_qp_ext)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) C. enhance struct ibv_context similarily to what was done for xrc This seems to bring to minimum the breakage from the perspective of libibverbs consumers. As for taking this down to uverbs, I am fine with anything you suggest. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
I don't think it is a good idea to mix enumerated types with bitmasks since bitmasks are wasting too much of the size of the enum (which depends on the CPU architecture). I like creation flags more also because it allows more freedom in choosing the semantics of the flag and not worrying if it fits in the category as a kind of "qp type". _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Eli and Jack, Rethought about this and agree that keeping those enums separately is cleaner. @Roland (and all), depending on this outcome I'll create my patch appropriately. Thx Nam _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
code. I personally prefer this because we want to providde LL to user space. Using qp_type's bit fields does not require me to change e.g. ib_uverbs_create_qp to reflect create_flags from user space, which will break previous libxyz - currently it's hardcoded to zero in uverbs_cmd.c, see also http://lists.openfabrics.org/pipermail/general/2008-March/047960.html Nam _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Thanks, I applied this with some extra code in all the low-level drivers to make sure that the create_flags are passed in as 0. Does From: Eli Cohen <eli@dev.mellanox.co.il> Date: Mon, 17 Mar 2008 17:23:47 +0200 Subject: [PATCH] IB/core: Add creation flags to struct ib_qp_init_attr Add a create_flags member to struct ib_qp_init_attr that will allow a kernel verbs consumer to create a pass special flags when creating a QP. Add a flag value for telling low-level drivers that a QP will be used for IPoIB UD LSO. The create_flags member will also be useful for XRC and ehca low-latency QP support. Since no create_flags handling is implemented yet, add code to all low-level drivers to return -EINVAL if create_flags is non-zero. Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/core/uverbs_cmd.c | 1 + drivers/infiniband/hw/amso1100/c2_provider.c | 3 +++ drivers/infiniband/hw/ehca/ehca_qp.c | 3 +++ drivers/infiniband/hw/ipath/ipath_qp.c | 5 +++++ drivers/infiniband/hw/mlx4/qp.c | 3 +++ drivers/infiniband/hw/mthca/mthca_provider.c | 3 +++ drivers/infiniband/hw/nes/nes_verbs.c | 3 +++ include/rdma/ib_verbs.h | 5 +++++ 8 files changed, 26 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 238680c..8767192 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1065,6 +1065,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.srq = srq; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; + attr.create_flags = 0; attr.cap.max_send_wr = cmd.max_send_wr; attr.cap.max_recv_wr = cmd.max_recv_wr; diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index ...
As far as I could see, all kernel consumers clear struct ib_qp_init_attr before calling ib_create_qp() and for user space we clear this too. The only hole is that rdma_create_qp() is exported and someone may be calling this function without clearing struct ib_qp_init_attr. But I think your approach is safer and should be used. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Roland, can you please comment what is the approach you prefer to see for --user space-- implementation of features such as the ehca low latency and the mlx4 block loopback QP "types"? do you want to go the XRC way of not breaking the ABI by introducing a new create-qp verb per If this is what you prefer to see, does it makes sense to have one new verb that can be used for xrc, ehca-ll, mlx4-block loopback and what ever new features we want to add for user space QPs in the future? Or. _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Below changes make sense to me as I would have to check the flags when introducing LL QP flag for ehca later. BTW: If you have some minutes, please let us agree on the encoding scheme for qp_types and create_flags as discussed in this thread. Thanks! _______________________________________________ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
