Skip to content

Commit

Permalink
Add support for discard to tapdisk
Browse files Browse the repository at this point in the history
With this commit, tapdisk is able to understand and pass-through discard
request to tapdisk drivers which support it.

Each discard messages on the xen blkif is handled as follows:
1. xenio_blkif_get_request() gets discard requests from the ring. It decodes
   the request depending on the blkif protocol type and converts
   them into generic blkif_request_discard_t using blkif_get_req_discard.
2. tapdisk_xenblkif_make_vbd_request() iterates the message counter
   blkback_stats.st_ds_req for discards.
3. tapdisk_xenblkif_parse_request_discard() converts the discard request into
   a td_vbd_request with a start sector (sec) and a length
   (discard_nr_sectors).
3. The td_vbd_request is encapsulated into a td_request_t and is sanity
   checked in tapdisk_image_check_td_request, tapdisk_image_check_request and
   the new td_queue_discard method.
4. Ultimately the request is handled in td_queue_discard. If the tapdisk
   driver implements td_queue_discard, the request is passed through to that.
   If not, the request is failed with -EOPNOTSUPP.

This commit has been dev-tested using:
* v8 Windows PV drivers that include XenDisk and thereby implement discard
* Linux xen-blkfront that implements discard

Signed-off-by: Robert Breker <robert.breker@citrix.com>
  • Loading branch information
Robert Breker committed Oct 7, 2015
1 parent 8ab7496 commit 1a51656
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 10 deletions.
11 changes: 8 additions & 3 deletions drivers/tapdisk-image.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,17 @@ tapdisk_image_check_td_request(td_image_t *image, td_request_t treq)
info = &image->info;
rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY);

if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE)
if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE &&
treq.op != TD_OP_DISCARD)
goto fail;

if (treq.op == TD_OP_WRITE && rdonly) {
if ((treq.op == TD_OP_WRITE || treq.op == TD_OP_DISCARD) && rdonly) {
err = -EPERM;
goto fail;
}

if (treq.secs <= 0 || treq.sec + treq.secs > info->size)
if ((treq.secs <= 0 || treq.sec + treq.secs > info->size) &&
treq.op != TD_OP_DISCARD)
goto fail;

return 0;
Expand Down Expand Up @@ -140,6 +142,9 @@ tapdisk_image_check_request(td_image_t *image, td_vbd_request_t *vreq)
secs += vreq->iov[i].secs;

switch (vreq->op) {
case TD_OP_DISCARD:
secs = vreq->discard_nr_sectors;
/* fall through */
case TD_OP_WRITE:
if (rdonly) {
err = -EPERM;
Expand Down
36 changes: 36 additions & 0 deletions drivers/tapdisk-interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,42 @@ td_queue_read(td_image_t *image, td_request_t treq)
td_complete_request(treq, err);
}


void
td_queue_discard(td_image_t *image, td_request_t treq)
{
int err;
td_driver_t *driver;

driver = image->driver;
if (!driver) {
err = -ENODEV;
goto fail;
}

if (!td_flag_test(driver->state, TD_DRIVER_OPEN)) {
err = -EBADF;
goto fail;
}

if (!driver->ops->td_queue_discard) {
err = -EOPNOTSUPP;
goto fail;
}

err = tapdisk_image_check_td_request(image, treq);
if (err)
goto fail;

driver->ops->td_queue_discard(driver, treq);

return;

fail:
td_complete_request(treq, err);
}


void
td_forward_request(td_request_t treq)
{
Expand Down
1 change: 1 addition & 0 deletions drivers/tapdisk-interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ int td_set_quantum(td_image_t *, int);

void td_queue_write(td_image_t *, td_request_t);
void td_queue_read(td_image_t *, td_request_t);
void td_queue_discard(td_image_t *, td_request_t);
void td_forward_request(td_request_t);
void td_complete_request(td_request_t, int);

Expand Down
22 changes: 22 additions & 0 deletions drivers/tapdisk-vbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1363,6 +1363,10 @@ __tapdisk_vbd_reissue_td_request(td_vbd_t *vbd,
case TD_OP_READ:
td_queue_read(parent, treq);
break;

case TD_OP_DISCARD:
td_queue_discard(parent, treq);
break;
}

done:
Expand Down Expand Up @@ -1485,6 +1489,19 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
goto fail;
}

if(vreq->op==TD_OP_DISCARD) {
treq.sidx = 1;
treq.sec = sec;
treq.image = image;
treq.cb = tapdisk_vbd_complete_td_request;
treq.cb_data = NULL;
treq.vreq = vreq;
treq.op = TD_OP_DISCARD;
td_queue_discard(treq.image, treq);
err = 0;
goto out;
}

for (i = 0; i < vreq->iovcnt; i++) {
struct td_iovec *iov = &vreq->iov[i];

Expand Down Expand Up @@ -1529,6 +1546,11 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
vbd->vdi_stats.stats->read_reqs_submitted++;
td_queue_read(treq.image, treq);
break;

case TD_OP_DISCARD:
treq.op = TD_OP_DISCARD;
td_queue_discard(treq.image, treq);
break;
}

DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08"PRIx64" secs 0x%04x "
Expand Down
6 changes: 6 additions & 0 deletions drivers/tapdisk.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

#include <time.h>
#include <stdint.h>
#include <stdbool.h>

#include "list.h"
#include "compiler.h"
Expand All @@ -74,6 +75,7 @@ extern unsigned int PAGE_SHIFT;

#define TD_OP_READ 0
#define TD_OP_WRITE 1
#define TD_OP_DISCARD 2

#define TD_OPEN_QUIET 0x00001
#define TD_OPEN_QUERY 0x00002
Expand Down Expand Up @@ -126,6 +128,7 @@ struct td_disk_info {
td_sector_t size;
long sector_size;
uint32_t info;
bool discard_supported;
};

struct td_iovec {
Expand Down Expand Up @@ -155,6 +158,8 @@ struct td_vbd_request {
td_vbd_t *vbd;
struct list_head next;
struct list_head *list_head;

uint64_t discard_nr_sectors;
};

struct td_request {
Expand Down Expand Up @@ -188,6 +193,7 @@ struct tap_disk {
int (*td_validate_parent) (td_driver_t *, td_driver_t *, td_flag_t);
void (*td_queue_read) (td_driver_t *, td_request_t);
void (*td_queue_write) (td_driver_t *, td_request_t);
void (*td_queue_discard) (td_driver_t *, td_request_t);
void (*td_debug) (td_driver_t *);
void (*td_stats) (td_driver_t *, td_stats_t *);

Expand Down
28 changes: 26 additions & 2 deletions drivers/td-ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,17 @@ xenio_pending_blkif(struct td_xenio_ctx * const ctx)
dst->seg[i] = src->seg[i]; \
}

#define blkif_get_req_discard(dst, discard_src) \
{ \
/* assert(sizeof(blkif_request_discard_t)<sizeof(blkif_request_t)) */ \
blkif_request_discard_t *discard_dst = (blkif_request_discard_t *) dst; \
discard_dst->operation = src->operation; \
discard_dst->flag = discard_src->flag; \
discard_dst->id = discard_src->id; \
discard_dst->sector_number = discard_src->sector_number; \
discard_dst->nr_sectors = discard_src->nr_sectors; \
}

/**
* Utility function that retrieves a request using @idx as the ring index,
* copying it to the @dst in a H/W independent way.
Expand All @@ -149,6 +160,7 @@ xenio_blkif_get_request(struct td_xenblkif * const blkif,
{
blkif_request_t *src;
src = RING_GET_REQUEST(&rings->native, idx);
// sizeof(blkif_request_t)>sizeof(blkif_request_discard_t)
memcpy(dst, src, sizeof(blkif_request_t));
break;
}
Expand All @@ -157,15 +169,27 @@ xenio_blkif_get_request(struct td_xenblkif * const blkif,
{
blkif_x86_32_request_t *src;
src = RING_GET_REQUEST(&rings->x86_32, idx);
blkif_get_req(dst, src);
if (src->operation==BLKIF_OP_DISCARD) {
blkif_x86_32_request_discard_t * discard_src;
discard_src = (blkif_x86_32_request_discard_t *) src;
blkif_get_req_discard(dst, discard_src);
} else {
blkif_get_req(dst, src);
}
break;
}

case BLKIF_PROTOCOL_X86_64:
{
blkif_x86_64_request_t *src;
src = RING_GET_REQUEST(&rings->x86_64, idx);
blkif_get_req(dst, src);
if (src->operation==BLKIF_OP_DISCARD) {
blkif_x86_64_request_discard_t * discard_src;
discard_src = (blkif_x86_64_request_discard_t *) src;
blkif_get_req_discard(dst, discard_src);
} else {
blkif_get_req(dst, src);
}
break;
}

Expand Down
46 changes: 43 additions & 3 deletions drivers/td-req.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,37 @@ tapdisk_xenblkif_parse_request(struct td_xenblkif * const blkif,
return err;
}

static inline int
tapdisk_xenblkif_parse_request_discard(struct td_xenblkif * const blkif,
struct td_xenblkif_req * const req)
{
int err = 0;
td_vbd_request_t *vreq;
blkif_request_discard_t * request_discard_msg;

vreq = &req->vreq;
ASSERT(vreq);

vreq->iov = 0;
vreq->iovcnt = 0;
vreq->sec = 0;

request_discard_msg = (blkif_request_discard_t*)&req->msg;
vreq->discard_nr_sectors = request_discard_msg->nr_sectors;
vreq->sec = request_discard_msg->sector_number;

/*
* TODO Isn't this kind of expensive to do for each requests? Why does
* the tapdisk need this in the first place?
*/
snprintf(req->name, sizeof(req->name), "xenvbd-%d-%d.%"SCNx64"",
blkif->domid, blkif->devid, request_discard_msg->id);
vreq->name = req->name;
vreq->token = blkif;
vreq->cb = __tapdisk_xenblkif_request_cb;

return err;
}

/**
* Initialises the standard tapdisk request (td_vbd_request_t) from the
Expand Down Expand Up @@ -710,6 +741,11 @@ tapdisk_xenblkif_make_vbd_request(struct td_xenblkif * const blkif,
tapreq->prot = PROT_READ;
vreq->op = TD_OP_WRITE;
break;
case BLKIF_OP_DISCARD:
blkif->stats.xenvbd->st_ds_req++;
tapreq->prot = PROT_WRITE;
vreq->op = TD_OP_DISCARD;
break;
default:
RING_ERR(blkif, "req %lu: invalid request type %d\n",
tapreq->msg.id, tapreq->msg.operation);
Expand All @@ -723,15 +759,18 @@ tapdisk_xenblkif_make_vbd_request(struct td_xenblkif * const blkif,
* Check that the number of segments is sane.
*/
if (unlikely((tapreq->msg.nr_segments == 0 &&
tapreq->msg.operation != BLKIF_OP_WRITE_BARRIER) ||
tapreq->msg.operation != BLKIF_OP_WRITE_BARRIER &&
tapreq->msg.operation != BLKIF_OP_DISCARD) ||
tapreq->msg.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
RING_ERR(blkif, "req %lu: bad number of segments in request (%d)\n",
tapreq->msg.id, tapreq->msg.nr_segments);
err = EINVAL;
goto out;
}

if (likely(tapreq->msg.nr_segments))
if (unlikely(tapreq->msg.operation == BLKIF_OP_DISCARD))
err = tapdisk_xenblkif_parse_request_discard(blkif, tapreq);
else if (likely(tapreq->msg.nr_segments))
err = tapdisk_xenblkif_parse_request(blkif, tapreq);
/*
* If we only got one request from the ring and that was a barrier one,
Expand Down Expand Up @@ -781,7 +820,8 @@ tapdisk_xenblkif_queue_request(struct td_xenblkif * const blkif,
return err;
}

if (likely(tapreq->msg.nr_segments)) {
if (likely(tapreq->msg.nr_segments ||
tapreq->msg.operation == BLKIF_OP_DISCARD )) {
err = tapdisk_vbd_queue_request(blkif->vbd, &tapreq->vreq);
if (unlikely(err)) {
/* TODO log error */
Expand Down
3 changes: 1 addition & 2 deletions include/blktap3.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@
*/
struct blkback_stats {
/**
* BLKIF_OP_DISCARD, not currently supported in blktap3, should always
* be zero
* Received BLKIF_OP_DISCARD requests.
*/
unsigned long long st_ds_req;

Expand Down
19 changes: 19 additions & 0 deletions include/xen_blkif.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,21 @@ struct blkif_x86_32_request {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
struct blkif_x86_32_request_discard {
uint8_t operation; /* BLKIF_OP_DISCARD */
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
blkif_vdev_t handle; /* was "handle" for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
uint64_t nr_sectors;
};
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
typedef struct blkif_x86_32_request blkif_x86_32_request_t;
typedef struct blkif_x86_32_request_discard blkif_x86_32_request_discard_t;
typedef struct blkif_x86_32_response blkif_x86_32_response_t;
#pragma pack(pop)

Expand All @@ -44,12 +53,22 @@ struct blkif_x86_64_request {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
struct blkif_x86_64_request_discard {
uint8_t operation; /* BLKIF_OP_DISCARD */
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
blkif_vdev_t _pad1; /* was "handle" for read/write requests */
uint32_t _pad2;
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
uint64_t nr_sectors;
};
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
typedef struct blkif_x86_64_request blkif_x86_64_request_t;
typedef struct blkif_x86_64_request_discard blkif_x86_64_request_discard_t;
typedef struct blkif_x86_64_response blkif_x86_64_response_t;

DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
Expand Down

0 comments on commit 1a51656

Please sign in to comment.