Skip to content

Commit

Permalink
RDMA/core: remove use of dma_virt_ops
Browse files Browse the repository at this point in the history
[ Upstream commit 5a7a9e0 ]

Use the ib_dma_* helpers to skip the DMA translation instead.  This
removes the last user if dma_virt_ops and keeps the weird layering
violation inside the RDMA core instead of burderning the DMA mapping
subsystems with it.  This also means the software RDMA drivers now don't
have to mess with DMA parameters that are not relevant to them at all, and
that in the future we can use PCI P2P transfers even for software RDMA, as
there is no first fake layer of DMA mapping that the P2P DMA support.

Link: https://lore.kernel.org/r/20201106181941.1878556-8-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Christoph Hellwig authored and gregkh committed Jan 9, 2021
1 parent 2a54ad3 commit 404fa09
Show file tree
Hide file tree
Showing 13 changed files with 81 additions and 76 deletions.
43 changes: 23 additions & 20 deletions drivers/infiniband/core/device.c
Expand Up @@ -1177,25 +1177,6 @@ static int assign_name(struct ib_device *device, const char *name)
return ret;
}

static void setup_dma_device(struct ib_device *device,
struct device *dma_device)
{
/*
* If the caller does not provide a DMA capable device then the IB
* device will be used. In this case the caller should fully setup the
* ibdev for DMA. This usually means using dma_virt_ops.
*/
#ifdef CONFIG_DMA_VIRT_OPS
if (!dma_device) {
device->dev.dma_ops = &dma_virt_ops;
dma_device = &device->dev;
}
#endif
WARN_ON(!dma_device);
device->dma_device = dma_device;
WARN_ON(!device->dma_device->dma_parms);
}

/*
* setup_device() allocates memory and sets up data that requires calling the
* device ops, this is the only reason these actions are not done during
Expand Down Expand Up @@ -1341,7 +1322,14 @@ int ib_register_device(struct ib_device *device, const char *name,
if (ret)
return ret;

setup_dma_device(device, dma_device);
/*
* If the caller does not provide a DMA capable device then the IB core
* will set up ib_sge and scatterlist structures that stash the kernel
* virtual address into the address field.
*/
WARN_ON(dma_device && !dma_device->dma_parms);
device->dma_device = dma_device;

ret = setup_device(device);
if (ret)
return ret;
Expand Down Expand Up @@ -2676,6 +2664,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
}
EXPORT_SYMBOL(ib_set_device_ops);

#ifdef CONFIG_INFINIBAND_VIRT_DMA
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
{
struct scatterlist *s;
int i;

for_each_sg(sg, s, nents, i) {
sg_dma_address(s) = (uintptr_t)sg_virt(s);
sg_dma_len(s) = s->length;
}
return nents;
}
EXPORT_SYMBOL(ib_dma_virt_map_sg);
#endif /* CONFIG_INFINIBAND_VIRT_DMA */

static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
Expand Down
5 changes: 4 additions & 1 deletion drivers/infiniband/core/rw.c
Expand Up @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
u32 sg_cnt, enum dma_data_direction dir)
{
if (is_pci_p2pdma_page(sg_page(sg)))
if (is_pci_p2pdma_page(sg_page(sg))) {
if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
return 0;
return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
}
return ib_dma_map_sg(dev, sg, sg_cnt, dir);
}

Expand Down
1 change: 0 additions & 1 deletion drivers/infiniband/sw/rdmavt/Kconfig
Expand Up @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT
depends on INFINIBAND_VIRT_DMA
depends on X86_64
depends on PCI
select DMA_VIRT_OPS
help
This is a common software verbs provider for RDMA networks.
6 changes: 2 additions & 4 deletions drivers/infiniband/sw/rdmavt/mr.c
Expand Up @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr)
* @acc: access flags
*
* Return: the memory region on success, otherwise returns an errno.
* Note that all DMA addresses should be created via the functions in
* struct dma_virt_ops.
*/
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
{
Expand Down Expand Up @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,

/*
* We use LKEY == zero for kernel virtual addresses
* (see rvt_get_dma_mr() and dma_virt_ops).
* (see rvt_get_dma_mr()).
*/
if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
Expand Down Expand Up @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,

/*
* We use RKEY == zero for kernel virtual addresses
* (see rvt_get_dma_mr() and dma_virt_ops).
* (see rvt_get_dma_mr()).
*/
rcu_read_lock();
if (rkey == 0) {
Expand Down
8 changes: 0 additions & 8 deletions drivers/infiniband/sw/rdmavt/vt.c
Expand Up @@ -524,7 +524,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
int rvt_register_device(struct rvt_dev_info *rdi)
{
int ret = 0, i;
u64 dma_mask;

if (!rdi)
return -EINVAL;
Expand Down Expand Up @@ -579,13 +578,6 @@ int rvt_register_device(struct rvt_dev_info *rdi)
/* Completion queues */
spin_lock_init(&rdi->n_cqs_lock);

/* DMA Operations */
rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask);
if (ret)
goto bail_wss;

/* Protection Domain */
spin_lock_init(&rdi->n_pds_lock);
rdi->n_pds_allocated = 0;
Expand Down
1 change: 0 additions & 1 deletion drivers/infiniband/sw/rxe/Kconfig
Expand Up @@ -5,7 +5,6 @@ config RDMA_RXE
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
select CRYPTO_CRC32
select DMA_VIRT_OPS
help
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
Expand Down
7 changes: 0 additions & 7 deletions drivers/infiniband/sw/rxe/rxe_verbs.c
Expand Up @@ -1118,7 +1118,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
int err;
struct ib_device *dev = &rxe->ib_dev;
struct crypto_shash *tfm;
u64 dma_mask;

strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));

Expand All @@ -1129,12 +1128,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
dev->local_dma_lkey = 0;
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
rxe->ndev->dev_addr);
dev->dev.dma_parms = &rxe->dma_parms;
dma_set_max_seg_size(&dev->dev, UINT_MAX);
dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask);
if (err)
return err;

dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
| BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
Expand Down
1 change: 0 additions & 1 deletion drivers/infiniband/sw/rxe/rxe_verbs.h
Expand Up @@ -352,7 +352,6 @@ struct rxe_port {
struct rxe_dev {
struct ib_device ib_dev;
struct ib_device_attr attr;
struct device_dma_parameters dma_parms;
int max_ucontext;
int max_inline_data;
struct mutex usdev_lock;
Expand Down
1 change: 0 additions & 1 deletion drivers/infiniband/sw/siw/Kconfig
Expand Up @@ -2,7 +2,6 @@ config RDMA_SIW
tristate "Software RDMA over TCP/IP (iWARP) driver"
depends on INET && INFINIBAND && LIBCRC32C
depends on INFINIBAND_VIRT_DMA
select DMA_VIRT_OPS
help
This driver implements the iWARP RDMA transport over
the Linux TCP/IP network stack. It enables a system with a
Expand Down
1 change: 0 additions & 1 deletion drivers/infiniband/sw/siw/siw.h
Expand Up @@ -69,7 +69,6 @@ struct siw_pd {

struct siw_device {
struct ib_device base_dev;
struct device_dma_parameters dma_parms;
struct net_device *netdev;
struct siw_dev_cap attrs;

Expand Down
7 changes: 0 additions & 7 deletions drivers/infiniband/sw/siw/siw_main.c
Expand Up @@ -306,7 +306,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
struct siw_device *sdev = NULL;
struct ib_device *base_dev;
struct device *parent = netdev->dev.parent;
u64 dma_mask;
int rv;

if (!parent) {
Expand Down Expand Up @@ -383,12 +382,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
*/
base_dev->phys_port_cnt = 1;
base_dev->dev.parent = parent;
base_dev->dev.dma_parms = &sdev->dma_parms;
dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask))
goto error;

base_dev->num_comp_vectors = num_possible_cpus();

xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
Expand Down
3 changes: 2 additions & 1 deletion drivers/nvme/target/rdma.c
Expand Up @@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
goto out_free_rsp;

r->req.p2p_client = &ndev->device->dev;
if (!ib_uses_virt_dma(ndev->device))
r->req.p2p_client = &ndev->device->dev;
r->send_sge.length = sizeof(*r->req.cqe);
r->send_sge.lkey = ndev->pd->local_dma_lkey;

Expand Down
73 changes: 50 additions & 23 deletions include/rdma/ib_verbs.h
Expand Up @@ -3943,13 +3943,25 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
-ENOSYS;
}

/*
* Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
* NULL. This causes the ib_dma* helpers to just stash the kernel virtual
* address into the dma address.
*/
static inline bool ib_uses_virt_dma(struct ib_device *dev)
{
return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
}

/**
* ib_dma_mapping_error - check a DMA addr for error
* @dev: The device for which the dma_addr was created
* @dma_addr: The DMA address to check
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
if (ib_uses_virt_dma(dev))
return 0;
return dma_mapping_error(dev->dma_device, dma_addr);
}

Expand All @@ -3964,6 +3976,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
if (ib_uses_virt_dma(dev))
return (uintptr_t)cpu_addr;
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}

Expand All @@ -3978,7 +3992,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
dma_unmap_single(dev->dma_device, addr, size, direction);
if (!ib_uses_virt_dma(dev))
dma_unmap_single(dev->dma_device, addr, size, direction);
}

/**
Expand All @@ -3995,6 +4010,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
size_t size,
enum dma_data_direction direction)
{
if (ib_uses_virt_dma(dev))
return (uintptr_t)(page_address(page) + offset);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}

Expand All @@ -4009,7 +4026,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
dma_unmap_page(dev->dma_device, addr, size, direction);
if (!ib_uses_virt_dma(dev))
dma_unmap_page(dev->dma_device, addr, size, direction);
}

int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
if (ib_uses_virt_dma(dev))
return ib_dma_virt_map_sg(dev, sg, nents);
return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
dma_attrs);
}

static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
if (!ib_uses_virt_dma(dev))
dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
dma_attrs);
}

/**
Expand All @@ -4023,7 +4063,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
return dma_map_sg(dev->dma_device, sg, nents, direction);
return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
}

/**
Expand All @@ -4037,24 +4077,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
dma_unmap_sg(dev->dma_device, sg, nents, direction);
}

static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
dma_attrs);
}

static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
}

/**
Expand All @@ -4065,6 +4088,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
*/
static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
if (ib_uses_virt_dma(dev))
return UINT_MAX;
return dma_get_max_seg_size(dev->dma_device);
}

Expand All @@ -4080,7 +4105,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
if (!ib_uses_virt_dma(dev))
dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}

/**
Expand All @@ -4095,7 +4121,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
dma_sync_single_for_device(dev->dma_device, addr, size, dir);
if (!ib_uses_virt_dma(dev))
dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}

/**
Expand Down

0 comments on commit 404fa09

Please sign in to comment.