Permalink
Browse files

Add driver-assisted striping for upcoming Intel NVMe controllers that…

… can

benefit from it.

Sponsored by:	Intel
Reviewed by:	kib (earlier version), carl
Approved by:	re (hrs)
MFC after:	1 week
  • Loading branch information...
1 parent 4e3872a commit 509a7951936691b0bf8b3257febf8fa9a25dbdc0 @jimharris jimharris committed Oct 8, 2013
Showing with 225 additions and 12 deletions.
  1. +0 −11 sys/dev/nvd/nvd.c
  2. +1 −1 sys/dev/nvme/nvme.h
  3. +223 −0 sys/dev/nvme/nvme_ns.c
  4. +1 −0 sys/dev/nvme/nvme_private.h
View
11 sys/dev/nvd/nvd.c
@@ -187,17 +187,6 @@ nvd_done(void *arg, const struct nvme_completion *cpl)
atomic_add_int(&ndisk->cur_depth, -1);
- /*
- * TODO: add more extensive translation of NVMe status codes
- * to different bio error codes (i.e. EIO, EINVAL, etc.)
- */
- if (nvme_completion_is_error(cpl)) {
- bp->bio_error = EIO;
- bp->bio_flags |= BIO_ERROR;
- bp->bio_resid = bp->bio_bcount;
- } else
- bp->bio_resid = 0;
-
biodone(bp);
}
View
2 sys/dev/nvme/nvme.h
@@ -535,7 +535,7 @@ struct nvme_controller_data {
uint8_t reserved6[1024];
/* bytes 3072-4095: vendor specific */
- uint8_t reserved7[1024];
+ uint8_t vs[1024];
} __packed __aligned(4);
struct nvme_namespace_data {
View
223 sys/dev/nvme/nvme_ns.c
@@ -34,13 +34,31 @@ __FBSDID("$FreeBSD$");
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/ioccom.h>
+#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <dev/pci/pcivar.h>
+#include <geom/geom.h>
+
#include "nvme_private.h"
+static void nvme_bio_child_inbed(struct bio *parent, int bio_error);
+static void nvme_bio_child_done(void *arg,
+ const struct nvme_completion *cpl);
+static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size,
+ uint32_t alignment);
+static void nvme_free_child_bios(int num_bios,
+ struct bio **child_bios);
+static struct bio ** nvme_allocate_child_bios(int num_bios);
+static struct bio ** nvme_construct_child_bios(struct bio *bp,
+ uint32_t alignment,
+ int *num_bios);
+static int nvme_ns_split_bio(struct nvme_namespace *ns,
+ struct bio *bp,
+ uint32_t alignment);
+
static int
nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
struct thread *td)
@@ -202,18 +220,218 @@ nvme_ns_bio_done(void *arg, const struct nvme_completion *status)
if (bp->bio_driver2)
free(bp->bio_driver2, M_NVME);
+ if (nvme_completion_is_error(status)) {
+ bp->bio_flags |= BIO_ERROR;
+ if (bp->bio_error == 0)
+ bp->bio_error = EIO;
+ }
+
+ if ((bp->bio_flags & BIO_ERROR) == 0)
+ bp->bio_resid = 0;
+ else
+ bp->bio_resid = bp->bio_bcount;
+
bp_cb_fn(bp, status);
}
+static void
+nvme_bio_child_inbed(struct bio *parent, int bio_error)
+{
+ struct nvme_completion parent_cpl;
+ int inbed;
+
+ if (bio_error != 0) {
+ parent->bio_flags |= BIO_ERROR;
+ parent->bio_error = bio_error;
+ }
+
+ /*
+ * atomic_fetchadd will return value before adding 1, so we still
+ * must add 1 to get the updated inbed number.
+ */
+ inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
+ if (inbed == parent->bio_children) {
+ bzero(&parent_cpl, sizeof(parent_cpl));
+ if (parent->bio_flags & BIO_ERROR)
+ parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
+ nvme_ns_bio_done(parent, &parent_cpl);
+ }
+}
+
+static void
+nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
+{
+ struct bio *child = arg;
+ struct bio *parent;
+ int bio_error;
+
+ parent = child->bio_parent;
+ g_destroy_bio(child);
+ bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
+ nvme_bio_child_inbed(parent, bio_error);
+}
+
+static uint32_t
+nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
+{
+ uint32_t num_segs, offset, remainder;
+
+ if (align == 0)
+ return (1);
+
+ KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
+
+ num_segs = size / align;
+ remainder = size & (align - 1);
+ offset = addr & (align - 1);
+ if (remainder > 0 || offset > 0)
+ num_segs += 1 + (remainder + offset - 1) / align;
+ return (num_segs);
+}
+
+static void
+nvme_free_child_bios(int num_bios, struct bio **child_bios)
+{
+ int i;
+
+ for (i = 0; i < num_bios; i++) {
+ if (child_bios[i] != NULL)
+ g_destroy_bio(child_bios[i]);
+ }
+
+ free(child_bios, M_NVME);
+}
+
+static struct bio **
+nvme_allocate_child_bios(int num_bios)
+{
+ struct bio **child_bios;
+ int err = 0, i;
+
+ child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
+ if (child_bios == NULL)
+ return (NULL);
+
+ for (i = 0; i < num_bios; i++) {
+ child_bios[i] = g_new_bio();
+ if (child_bios[i] == NULL)
+ err = ENOMEM;
+ }
+
+ if (err == ENOMEM) {
+ nvme_free_child_bios(num_bios, child_bios);
+ return (NULL);
+ }
+
+ return (child_bios);
+}
+
+static struct bio **
+nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
+{
+ struct bio **child_bios;
+ struct bio *child;
+ uint64_t cur_offset;
+ caddr_t data;
+ uint32_t rem_bcount;
+ int i;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ struct vm_page **ma;
+ uint32_t ma_offset;
+#endif
+
+ *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
+ alignment);
+ child_bios = nvme_allocate_child_bios(*num_bios);
+ if (child_bios == NULL)
+ return (NULL);
+
+ bp->bio_children = *num_bios;
+ bp->bio_inbed = 0;
+ cur_offset = bp->bio_offset;
+ rem_bcount = bp->bio_bcount;
+ data = bp->bio_data;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ ma_offset = bp->bio_ma_offset;
+ ma = bp->bio_ma;
+#endif
+
+ for (i = 0; i < *num_bios; i++) {
+ child = child_bios[i];
+ child->bio_parent = bp;
+ child->bio_cmd = bp->bio_cmd;
+ child->bio_offset = cur_offset;
+ child->bio_bcount = min(rem_bcount,
+ alignment - (cur_offset & (alignment - 1)));
+ child->bio_flags = bp->bio_flags;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+ if (bp->bio_flags & BIO_UNMAPPED) {
+ child->bio_ma_offset = ma_offset;
+ child->bio_ma = ma;
+ child->bio_ma_n =
+ nvme_get_num_segments(child->bio_ma_offset,
+ child->bio_bcount, PAGE_SIZE);
+ ma_offset = (ma_offset + child->bio_bcount) &
+ PAGE_MASK;
+ ma += child->bio_ma_n;
+ if (ma_offset != 0)
+ ma -= 1;
+ } else
+#endif
+ {
+ child->bio_data = data;
+ data += child->bio_bcount;
+ }
+ cur_offset += child->bio_bcount;
+ rem_bcount -= child->bio_bcount;
+ }
+
+ return (child_bios);
+}
+
+static int
+nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
+ uint32_t alignment)
+{
+ struct bio *child;
+ struct bio **child_bios;
+ int err, i, num_bios;
+
+ child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
+ if (child_bios == NULL)
+ return (ENOMEM);
+
+ for (i = 0; i < num_bios; i++) {
+ child = child_bios[i];
+ err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
+ if (err != 0) {
+ nvme_bio_child_inbed(bp, err);
+ g_destroy_bio(child);
+ }
+ }
+
+ free(child_bios, M_NVME);
+ return (0);
+}
+
int
nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
nvme_cb_fn_t cb_fn)
{
struct nvme_dsm_range *dsm_range;
+ uint32_t num_bios;
int err;
bp->bio_driver1 = cb_fn;
+ if (ns->stripesize > 0 &&
+ (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+ num_bios = nvme_get_num_segments(bp->bio_offset,
+ bp->bio_bcount, ns->stripesize);
+ if (num_bios > 1)
+ return (nvme_ns_split_bio(ns, bp, ns->stripesize));
+ }
+
switch (bp->bio_cmd) {
case BIO_READ:
err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
@@ -276,6 +494,11 @@ nvme_ns_construct(struct nvme_namespace *ns, uint16_t id,
ns->ctrlr = ctrlr;
ns->id = id;
+ ns->stripesize = 0;
+
+ if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0)
+ ns->stripesize =
+ (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
/*
* Namespaces are reconstructed after a controller reset, so check
View
1 sys/dev/nvme/nvme_private.h
@@ -238,6 +238,7 @@ struct nvme_namespace {
uint16_t flags;
struct cdev *cdev;
void *cons_cookie[NVME_MAX_CONSUMERS];
+ uint32_t stripesize;
struct mtx lock;
};

0 comments on commit 509a795

Please sign in to comment.