From a449817cb649edef012c3f988a379aa609e05d50 Mon Sep 17 00:00:00 2001 From: Optane_KHJ Date: Mon, 23 Aug 2021 18:32:00 +0900 Subject: [PATCH] Modify how new write entries are added, only add write entries for duplicate data page, use the original target entry after the deduplication --- fs/nova/balloc.c | 1886 ++++++++++++++++++++++++-------------------- fs/nova/dedup.c | 1238 ++++++++++++++--------------- fs/nova/dedup.h | 33 +- fs/nova/nova_def.h | 2 +- fs/nova/super.c | 5 +- fs/nova/super.h | 9 +- 6 files changed, 1631 insertions(+), 1542 deletions(-) diff --git a/fs/nova/balloc.c b/fs/nova/balloc.c index 5a0ca2cabc4a..ee1113e1f097 100644 --- a/fs/nova/balloc.c +++ b/fs/nova/balloc.c @@ -31,1036 +31,1168 @@ int nova_alloc_block_free_lists(struct super_block *sb) { - struct nova_sb_info *sbi = NOVA_SB(sb); - struct free_list *free_list; - int i; + struct nova_sb_info *sbi = NOVA_SB(sb); + struct free_list *free_list; + int i; - sbi->free_lists = kcalloc(sbi->cpus, sizeof(struct free_list), - GFP_KERNEL); + sbi->free_lists = kcalloc(sbi->cpus, sizeof(struct free_list), + GFP_KERNEL); - if (!sbi->free_lists) - return -ENOMEM; + if (!sbi->free_lists) + return -ENOMEM; - for (i = 0; i < sbi->cpus; i++) { - free_list = nova_get_free_list(sb, i); - free_list->block_free_tree = RB_ROOT; - spin_lock_init(&free_list->s_lock); - free_list->index = i; - } + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + free_list->block_free_tree = RB_ROOT; + spin_lock_init(&free_list->s_lock); + free_list->index = i; + } - return 0; + return 0; } void nova_delete_free_lists(struct super_block *sb) { - struct nova_sb_info *sbi = NOVA_SB(sb); + struct nova_sb_info *sbi = NOVA_SB(sb); - /* Each tree is freed in save_blocknode_mappings */ - kfree(sbi->free_lists); - sbi->free_lists = NULL; + /* Each tree is freed in save_blocknode_mappings */ + kfree(sbi->free_lists); + sbi->free_lists = NULL; } static int nova_data_csum_init_free_list(struct super_block *sb, - struct free_list *free_list) + struct free_list *free_list) { - struct nova_sb_info *sbi = NOVA_SB(sb); - unsigned long data_csum_blocks; - - /* Allocate pages to hold data checksums. We store one checksum for - * each stripe for each page. We replicate the checksums at the - * beginning and end of per-cpu region that holds the data they cover. - */ - data_csum_blocks = ((sbi->initsize >> NOVA_STRIPE_SHIFT) - * NOVA_DATA_CSUM_LEN) >> PAGE_SHIFT; - free_list->csum_start = free_list->block_start; - free_list->block_start += data_csum_blocks / sbi->cpus; - if (data_csum_blocks % sbi->cpus) - free_list->block_start++; - - free_list->num_csum_blocks = - free_list->block_start - free_list->csum_start; - - free_list->replica_csum_start = free_list->block_end + 1 - - free_list->num_csum_blocks; - free_list->block_end -= free_list->num_csum_blocks; - - return 0; + struct nova_sb_info *sbi = NOVA_SB(sb); + unsigned long data_csum_blocks; + + /* Allocate pages to hold data checksums. We store one checksum for + * each stripe for each page. We replicate the checksums at the + * beginning and end of per-cpu region that holds the data they cover. + */ + data_csum_blocks = ((sbi->initsize >> NOVA_STRIPE_SHIFT) + * NOVA_DATA_CSUM_LEN) >> PAGE_SHIFT; + free_list->csum_start = free_list->block_start; + free_list->block_start += data_csum_blocks / sbi->cpus; + if (data_csum_blocks % sbi->cpus) + free_list->block_start++; + + free_list->num_csum_blocks = + free_list->block_start - free_list->csum_start; + + free_list->replica_csum_start = free_list->block_end + 1 - + free_list->num_csum_blocks; + free_list->block_end -= free_list->num_csum_blocks; + + return 0; } static int nova_data_parity_init_free_list(struct super_block *sb, - struct free_list *free_list) + struct free_list *free_list) { - struct nova_sb_info *sbi = NOVA_SB(sb); - unsigned long blocksize, total_blocks, parity_blocks; - - /* Allocate blocks to store data block parity stripes. - * Always reserve in case user turns it off at init mount but later - * turns it on. - */ - blocksize = sb->s_blocksize; - total_blocks = sbi->initsize / blocksize; - parity_blocks = total_blocks / (blocksize / NOVA_STRIPE_SIZE + 1); - if (total_blocks % (blocksize / NOVA_STRIPE_SIZE + 1)) - parity_blocks++; - - free_list->parity_start = free_list->block_start; - free_list->block_start += parity_blocks / sbi->cpus; - if (parity_blocks % sbi->cpus) - free_list->block_start++; - - free_list->num_parity_blocks = - free_list->block_start - free_list->parity_start; - - free_list->replica_parity_start = free_list->block_end + 1 - - free_list->num_parity_blocks; - - return 0; + struct nova_sb_info *sbi = NOVA_SB(sb); + unsigned long blocksize, total_blocks, parity_blocks; + + /* Allocate blocks to store data block parity stripes. + * Always reserve in case user turns it off at init mount but later + * turns it on. + */ + blocksize = sb->s_blocksize; + total_blocks = sbi->initsize / blocksize; + parity_blocks = total_blocks / (blocksize / NOVA_STRIPE_SIZE + 1); + if (total_blocks % (blocksize / NOVA_STRIPE_SIZE + 1)) + parity_blocks++; + + free_list->parity_start = free_list->block_start; + free_list->block_start += parity_blocks / sbi->cpus; + if (parity_blocks % sbi->cpus) + free_list->block_start++; + + free_list->num_parity_blocks = + free_list->block_start - free_list->parity_start; + + free_list->replica_parity_start = free_list->block_end + 1 - + free_list->num_parity_blocks; + + return 0; } // Initialize a free list. Each CPU gets an equal share of the block space to // manage. static void nova_init_free_list(struct super_block *sb, - struct free_list *free_list, int index) + struct free_list *free_list, int index) { - struct nova_sb_info *sbi = NOVA_SB(sb); - unsigned long per_list_blocks; - /* - per_list_blocks = sbi->num_blocks / sbi->cpus; + struct nova_sb_info *sbi = NOVA_SB(sb); + unsigned long per_list_blocks; + /* + per_list_blocks = sbi->num_blocks / sbi->cpus; - free_list->block_start = per_list_blocks * index; - free_list->block_end = free_list->block_start + - per_list_blocks - 1; - if (index == 0) - free_list->block_start += sbi->head_reserved_blocks; - if (index == sbi->cpus - 1) - free_list->block_end -= sbi->tail_reserved_blocks; - */ - /* NOVA DEDUP KHJ */ - per_list_blocks = (sbi->num_blocks - sbi->head_reserved_blocks) / sbi->cpus; + free_list->block_start = per_list_blocks * index; + free_list->block_end = free_list->block_start + + per_list_blocks - 1; + if (index == 0) + free_list->block_start += sbi->head_reserved_blocks; + if (index == sbi->cpus - 1) + free_list->block_end -= sbi->tail_reserved_blocks; + */ + /* NOVA DEDUP KHJ */ + per_list_blocks = (sbi->num_blocks - sbi->head_reserved_blocks) / sbi->cpus; - free_list->block_start = sbi->head_reserved_blocks + per_list_blocks * index; - free_list->block_end = free_list->block_start + per_list_blocks -1; + free_list->block_start = sbi->head_reserved_blocks + per_list_blocks * index; + free_list->block_end = free_list->block_start + per_list_blocks -1; - printk("cpu%d: start%lu, end%lu\n",index,free_list->block_start, free_list->block_end); + printk("cpu%d: start%lu, end%lu\n",index,free_list->block_start, free_list->block_end); - if(index==sbi->cpus-1) - free_list->block_end -= sbi->tail_reserved_blocks; + if(index==sbi->cpus-1) + free_list->block_end -= sbi->tail_reserved_blocks; - nova_data_csum_init_free_list(sb, free_list); - nova_data_parity_init_free_list(sb, free_list); + nova_data_csum_init_free_list(sb, free_list); + nova_data_parity_init_free_list(sb, free_list); } struct nova_range_node *nova_alloc_blocknode(struct super_block *sb) { - return nova_alloc_range_node(sb); + return nova_alloc_range_node(sb); } void nova_free_blocknode(struct nova_range_node *node) { - nova_free_range_node(node); + nova_free_range_node(node); } void nova_init_blockmap(struct super_block *sb, int recovery) { - struct nova_sb_info *sbi = NOVA_SB(sb); - struct rb_root *tree; - struct nova_range_node *blknode; - struct free_list *free_list; - int i; - int ret; - - /* Divide the block range among per-CPU free lists */ - /* NOVA DEDUP KHJ */ - //sbi->per_list_blocks = sbi->num_blocks / sbi->cpus; - sbi->per_list_blocks = (sbi->num_blocks - sbi->head_reserved_blocks) / sbi->cpus; - - for (i = 0; i < sbi->cpus; i++) { - free_list = nova_get_free_list(sb, i); - tree = &(free_list->block_free_tree); - nova_init_free_list(sb, free_list, i); - - /* For recovery, update these fields later */ - if (recovery == 0) { - free_list->num_free_blocks = free_list->block_end - - free_list->block_start + 1; - - blknode = nova_alloc_blocknode(sb); - if (blknode == NULL) - BUG(); - blknode->range_low = free_list->block_start; - blknode->range_high = free_list->block_end; - nova_update_range_node_checksum(blknode); - ret = nova_insert_blocktree(tree, blknode); - if (ret) { - nova_err(sb, "%s failed\n", __func__); - nova_free_blocknode(blknode); - return; - } - free_list->first_node = blknode; - free_list->last_node = blknode; - free_list->num_blocknode = 1; - } - - nova_dbgv("%s: free list %d: block start %lu, end %lu, " - "%lu free blocks\n", - __func__, i, - free_list->block_start, - free_list->block_end, - free_list->num_free_blocks); - } + struct nova_sb_info *sbi = NOVA_SB(sb); + struct rb_root *tree; + struct nova_range_node *blknode; + struct free_list *free_list; + int i; + int ret; + + /* Divide the block range among per-CPU free lists */ + /* NOVA DEDUP KHJ */ + //sbi->per_list_blocks = sbi->num_blocks / sbi->cpus; + sbi->per_list_blocks = (sbi->num_blocks - sbi->head_reserved_blocks) / sbi->cpus; + + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + tree = &(free_list->block_free_tree); + nova_init_free_list(sb, free_list, i); + + /* For recovery, update these fields later */ + if (recovery == 0) { + free_list->num_free_blocks = free_list->block_end - + free_list->block_start + 1; + + blknode = nova_alloc_blocknode(sb); + if (blknode == NULL) + BUG(); + blknode->range_low = free_list->block_start; + blknode->range_high = free_list->block_end; + nova_update_range_node_checksum(blknode); + ret = nova_insert_blocktree(tree, blknode); + if (ret) { + nova_err(sb, "%s failed\n", __func__); + nova_free_blocknode(blknode); + return; + } + free_list->first_node = blknode; + free_list->last_node = blknode; + free_list->num_blocknode = 1; + } + + nova_dbgv("%s: free list %d: block start %lu, end %lu, " + "%lu free blocks\n", + __func__, i, + free_list->block_start, + free_list->block_end, + free_list->num_free_blocks); + } } static inline int nova_rbtree_compare_rangenode(struct nova_range_node *curr, - unsigned long key, enum node_type type) + unsigned long key, enum node_type type) { - if (type == NODE_DIR) { - if (key < curr->hash) - return -1; - if (key > curr->hash) - return 1; - return 0; - } - - /* Block and inode */ - if (key < curr->range_low) - return -1; - if (key > curr->range_high) - return 1; - - return 0; + if (type == NODE_DIR) { + if (key < curr->hash) + return -1; + if (key > curr->hash) + return 1; + return 0; + } + + /* Block and inode */ + if (key < curr->range_low) + return -1; + if (key > curr->range_high) + return 1; + + return 0; } int nova_find_range_node(struct rb_root *tree, unsigned long key, - enum node_type type, struct nova_range_node **ret_node) + enum node_type type, struct nova_range_node **ret_node) { - struct nova_range_node *curr = NULL; - struct rb_node *temp; - int compVal; - int ret = 0; - - temp = tree->rb_node; - - while (temp) { - curr = container_of(temp, struct nova_range_node, node); - compVal = nova_rbtree_compare_rangenode(curr, key, type); - - if (compVal == -1) { - temp = temp->rb_left; - } else if (compVal == 1) { - temp = temp->rb_right; - } else { - ret = 1; - break; - } - } - - if (curr && !nova_range_node_checksum_ok(curr)) { - nova_dbg("%s: curr failed\n", __func__); - return 0; - } - - *ret_node = curr; - return ret; + struct nova_range_node *curr = NULL; + struct rb_node *temp; + int compVal; + int ret = 0; + + temp = tree->rb_node; + + while (temp) { + curr = container_of(temp, struct nova_range_node, node); + compVal = nova_rbtree_compare_rangenode(curr, key, type); + + if (compVal == -1) { + temp = temp->rb_left; + } else if (compVal == 1) { + temp = temp->rb_right; + } else { + ret = 1; + break; + } + } + + if (curr && !nova_range_node_checksum_ok(curr)) { + nova_dbg("%s: curr failed\n", __func__); + return 0; + } + + *ret_node = curr; + return ret; } int nova_insert_range_node(struct rb_root *tree, - struct nova_range_node *new_node, enum node_type type) + struct nova_range_node *new_node, enum node_type type) { - struct nova_range_node *curr; - struct rb_node **temp, *parent; - int compVal; - - temp = &(tree->rb_node); - parent = NULL; - - while (*temp) { - curr = container_of(*temp, struct nova_range_node, node); - compVal = nova_rbtree_compare_rangenode(curr, - new_node->range_low, type); - parent = *temp; - - if (compVal == -1) { - temp = &((*temp)->rb_left); - } else if (compVal == 1) { - temp = &((*temp)->rb_right); - } else { - nova_dbg("%s: type %d entry %lu - %lu already exists: " - "%lu - %lu\n", - __func__, type, new_node->range_low, - new_node->range_high, curr->range_low, - curr->range_high); - return -EINVAL; - } - } - - rb_link_node(&new_node->node, parent, temp); - rb_insert_color(&new_node->node, tree); - - return 0; + struct nova_range_node *curr; + struct rb_node **temp, *parent; + int compVal; + + temp = &(tree->rb_node); + parent = NULL; + + while (*temp) { + curr = container_of(*temp, struct nova_range_node, node); + compVal = nova_rbtree_compare_rangenode(curr, + new_node->range_low, type); + parent = *temp; + + if (compVal == -1) { + temp = &((*temp)->rb_left); + } else if (compVal == 1) { + temp = &((*temp)->rb_right); + } else { + nova_dbg("%s: type %d entry %lu - %lu already exists: " + "%lu - %lu\n", + __func__, type, new_node->range_low, + new_node->range_high, curr->range_low, + curr->range_high); + return -EINVAL; + } + } + + rb_link_node(&new_node->node, parent, temp); + rb_insert_color(&new_node->node, tree); + + return 0; } void nova_destroy_range_node_tree(struct super_block *sb, - struct rb_root *tree) + struct rb_root *tree) { - struct nova_range_node *curr; - struct rb_node *temp; - - temp = rb_first(tree); - while (temp) { - curr = container_of(temp, struct nova_range_node, node); - temp = rb_next(temp); - rb_erase(&curr->node, tree); - nova_free_range_node(curr); - } + struct nova_range_node *curr; + struct rb_node *temp; + + temp = rb_first(tree); + while (temp) { + curr = container_of(temp, struct nova_range_node, node); + temp = rb_next(temp); + rb_erase(&curr->node, tree); + nova_free_range_node(curr); + } } int nova_insert_blocktree(struct rb_root *tree, - struct nova_range_node *new_node) + struct nova_range_node *new_node) { - int ret; + int ret; - ret = nova_insert_range_node(tree, new_node, NODE_BLOCK); - if (ret) - nova_dbg("ERROR: %s failed %d\n", __func__, ret); + ret = nova_insert_range_node(tree, new_node, NODE_BLOCK); + if (ret) + nova_dbg("ERROR: %s failed %d\n", __func__, ret); - return ret; + return ret; } /* Used for both block free tree and inode inuse tree */ int nova_find_free_slot(struct rb_root *tree, unsigned long range_low, - unsigned long range_high, struct nova_range_node **prev, - struct nova_range_node **next) + unsigned long range_high, struct nova_range_node **prev, + struct nova_range_node **next) { - struct nova_range_node *ret_node = NULL; - struct rb_node *tmp; - int check_prev = 0, check_next = 0; - int ret; - - ret = nova_find_range_node(tree, range_low, NODE_BLOCK, &ret_node); - if (ret) { - nova_dbg("%s ERROR: %lu - %lu already in free list\n", - __func__, range_low, range_high); - return -EINVAL; - } + struct nova_range_node *ret_node = NULL; + struct rb_node *tmp; + int check_prev = 0, check_next = 0; + int ret; + + ret = nova_find_range_node(tree, range_low, NODE_BLOCK, &ret_node); + if (ret) { + nova_dbg("%s ERROR: %lu - %lu already in free list\n", + __func__, range_low, range_high); + return -EINVAL; + } + + if (!ret_node) { + *prev = *next = NULL; + } else if (ret_node->range_high < range_low) { + *prev = ret_node; + tmp = rb_next(&ret_node->node); + if (tmp) { + *next = container_of(tmp, struct nova_range_node, node); + check_next = 1; + } else { + *next = NULL; + } + } else if (ret_node->range_low > range_high) { + *next = ret_node; + tmp = rb_prev(&ret_node->node); + if (tmp) { + *prev = container_of(tmp, struct nova_range_node, node); + check_prev = 1; + } else { + *prev = NULL; + } + } else { + nova_dbg("%s ERROR: %lu - %lu overlaps with existing " + "node %lu - %lu\n", + __func__, range_low, range_high, ret_node->range_low, + ret_node->range_high); + return -EINVAL; + } + + if (check_prev && !nova_range_node_checksum_ok(*prev)) { + nova_dbg("%s: prev failed\n", __func__); + return -EIO; + } + + if (check_next && !nova_range_node_checksum_ok(*next)) { + nova_dbg("%s: next failed\n", __func__); + return -EIO; + } + + return 0; +} - if (!ret_node) { - *prev = *next = NULL; - } else if (ret_node->range_high < range_low) { - *prev = ret_node; - tmp = rb_next(&ret_node->node); - if (tmp) { - *next = container_of(tmp, struct nova_range_node, node); - check_next = 1; - } else { - *next = NULL; - } - } else if (ret_node->range_low > range_high) { - *next = ret_node; - tmp = rb_prev(&ret_node->node); - if (tmp) { - *prev = container_of(tmp, struct nova_range_node, node); - check_prev = 1; - } else { - *prev = NULL; - } - } else { - nova_dbg("%s ERROR: %lu - %lu overlaps with existing " - "node %lu - %lu\n", - __func__, range_low, range_high, ret_node->range_low, - ret_node->range_high); - return -EINVAL; - } +static int nova_free_blocks(struct super_block *sb, unsigned long blocknr, + int num, unsigned short btype, int log_page) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct rb_root *tree; + unsigned long block_low; + unsigned long block_high; + unsigned long num_blocks = 0; + struct nova_range_node *prev = NULL; + struct nova_range_node *next = NULL; + struct nova_range_node *curr_node; + struct free_list *free_list; + int cpuid; + int new_node_used = 0; + int ret=0; + INIT_TIMING(free_time); + + if (num <= 0) { + nova_dbg("%s ERROR: free %d\n", __func__, num); + return -EINVAL; + } + + NOVA_START_TIMING(free_blocks_t, free_time); + /* NOVA DEDUP KHJ */ + //cpuid = blocknr / sbi->per_list_blocks; + cpuid = (blocknr - sbi->head_reserved_blocks)/sbi->per_list_blocks; + + /* Pre-allocate blocknode */ + curr_node = nova_alloc_blocknode(sb); + if (curr_node == NULL) { + /* returning without freeing the block*/ + NOVA_END_TIMING(free_blocks_t, free_time); + return -ENOMEM; + } + + free_list = nova_get_free_list(sb, cpuid); + + spin_lock(&free_list->s_lock); + + tree = &(free_list->block_free_tree); + + num_blocks = nova_get_numblocks(btype) * num; + block_low = blocknr; + block_high = blocknr + num_blocks - 1; + + nova_dbgv("Free: %lu - %lu\n", block_low, block_high); + + if (blocknr < free_list->block_start || + blocknr + num > free_list->block_end + 1) { + nova_err(sb, "free blocks %lu to %lu, free list %d, " + "start %lu, end %lu\n", + blocknr, blocknr + num - 1, + free_list->index, + free_list->block_start, + free_list->block_end); + ret = -EIO; + goto out; + } + + ret = nova_find_free_slot(tree, block_low, + block_high, &prev, &next); + + if (ret) { + nova_dbg("%s: find free slot fail: %d\n", __func__, ret); + goto out; + } + + if (prev && next && (block_low == prev->range_high + 1) && + (block_high + 1 == next->range_low)) { + /* fits the hole */ + rb_erase(&next->node, tree); + free_list->num_blocknode--; + prev->range_high = next->range_high; + nova_update_range_node_checksum(prev); + if (free_list->last_node == next) + free_list->last_node = prev; + nova_free_blocknode(next); + goto block_found; + } + if (prev && (block_low == prev->range_high + 1)) { + /* Aligns left */ + prev->range_high += num_blocks; + nova_update_range_node_checksum(prev); + goto block_found; + } + if (next && (block_high + 1 == next->range_low)) { + /* Aligns right */ + next->range_low -= num_blocks; + nova_update_range_node_checksum(next); + goto block_found; + } + + /* Aligns somewhere in the middle */ + curr_node->range_low = block_low; + curr_node->range_high = block_high; + nova_update_range_node_checksum(curr_node); + new_node_used = 1; + ret = nova_insert_blocktree(tree, curr_node); + if (ret) { + new_node_used = 0; + goto out; + } + if (!prev) + free_list->first_node = curr_node; + if (!next) + free_list->last_node = curr_node; + + free_list->num_blocknode++; - if (check_prev && !nova_range_node_checksum_ok(*prev)) { - nova_dbg("%s: prev failed\n", __func__); - return -EIO; - } +block_found: + free_list->num_free_blocks += num_blocks; - if (check_next && !nova_range_node_checksum_ok(*next)) { - nova_dbg("%s: next failed\n", __func__); - return -EIO; - } + if (log_page) { + free_list->free_log_count++; + free_list->freed_log_pages += num_blocks; + } else { + free_list->free_data_count++; + free_list->freed_data_pages += num_blocks; + } + +out: + spin_unlock(&free_list->s_lock); + if (new_node_used == 0) + nova_free_blocknode(curr_node); - return 0; + NOVA_END_TIMING(free_blocks_t, free_time); + return ret; } -static int nova_free_blocks(struct super_block *sb, unsigned long blocknr, - int num, unsigned short btype, int log_page) +/* DEDUP NOVA KHJ */ +static int nova_dedup_free_blocks(struct super_block *sb, unsigned long blocknr, + int num, unsigned short btype, int log_page) { - struct nova_sb_info *sbi = NOVA_SB(sb); - struct rb_root *tree; - unsigned long block_low; - unsigned long block_high; - /* NOVA_DEDUP KHJ*/ - unsigned long t_block_low; - unsigned long t_block_high; - - unsigned long num_blocks = 0; - struct nova_range_node *prev = NULL; - struct nova_range_node *next = NULL; - struct nova_range_node *curr_node; - struct free_list *free_list; - int cpuid; - int new_node_used = 0; - int ret=0; - int i; - INIT_TIMING(free_time); - - if (num <= 0) { - nova_dbg("%s ERROR: free %d\n", __func__, num); - return -EINVAL; - } - - NOVA_START_TIMING(free_blocks_t, free_time); - /* NOVA DEDUP KHJ */ - //cpuid = blocknr / sbi->per_list_blocks; - cpuid = (blocknr - sbi->head_reserved_blocks)/sbi->per_list_blocks; - - /* Pre-allocate blocknode */ - curr_node = nova_alloc_blocknode(sb); - if (curr_node == NULL) { - /* returning without freeing the block*/ - NOVA_END_TIMING(free_blocks_t, free_time); - return -ENOMEM; - } - - free_list = nova_get_free_list(sb, cpuid); - num_blocks = nova_get_numblocks(btype) * num; - t_block_low = blocknr; - t_block_high = blocknr + num_blocks - 1; - - block_low = block_high = 0; - - /* NOVA DEDUP KHJ */ - for(i=t_block_low; i<=t_block_high;i++){ - if(nova_dedup_is_duplicate(sb, i,false) != 0){ - block_low = t_block_low; - block_high = i; - if(i != t_block_high) - continue; - } - else{ - t_block_low = i+1; - if(block_low == block_high && block_low == 0) - continue; - } - - - spin_lock(&free_list->s_lock); - tree = &(free_list->block_free_tree); - - //num_blocks = nova_get_numblocks(btype) * num; - //block_low = blocknr; - //block_high = blocknr + num_blocks - 1; - - nova_dbgv("Free: %lu - %lu\n", block_low, block_high); - - if (blocknr < free_list->block_start || - blocknr + num > free_list->block_end + 1) { - nova_err(sb, "free blocks %lu to %lu, free list %d, " - "start %lu, end %lu\n", - blocknr, blocknr + num - 1, - free_list->index, - free_list->block_start, - free_list->block_end); - ret = -EIO; - goto out; - } - - ret = nova_find_free_slot(tree, block_low, - block_high, &prev, &next); - - if (ret) { - nova_dbg("%s: find free slot fail: %d\n", __func__, ret); - goto out; - } - - if (prev && next && (block_low == prev->range_high + 1) && - (block_high + 1 == next->range_low)) { - /* fits the hole */ - rb_erase(&next->node, tree); - free_list->num_blocknode--; - prev->range_high = next->range_high; - nova_update_range_node_checksum(prev); - if (free_list->last_node == next) - free_list->last_node = prev; - nova_free_blocknode(next); - goto block_found; - } - if (prev && (block_low == prev->range_high + 1)) { - /* Aligns left */ - prev->range_high += num_blocks; - nova_update_range_node_checksum(prev); - goto block_found; - } - if (next && (block_high + 1 == next->range_low)) { - /* Aligns right */ - next->range_low -= num_blocks; - nova_update_range_node_checksum(next); - goto block_found; - } - - /* Aligns somewhere in the middle */ - curr_node->range_low = block_low; - curr_node->range_high = block_high; - nova_update_range_node_checksum(curr_node); - new_node_used = 1; - ret = nova_insert_blocktree(tree, curr_node); - if (ret) { - new_node_used = 0; - goto out; - } - if (!prev) - free_list->first_node = curr_node; - if (!next) - free_list->last_node = curr_node; - - free_list->num_blocknode++; + struct nova_sb_info *sbi = NOVA_SB(sb); + struct rb_root *tree; + unsigned long block_low; + unsigned long block_high; + /* NOVA_DEDUP KHJ*/ + unsigned long t_block_low; + unsigned long t_block_high; + + unsigned long num_blocks = 0; + struct nova_range_node *prev = NULL; + struct nova_range_node *next = NULL; + struct nova_range_node *curr_node; + struct free_list *free_list; + int cpuid; + int new_node_used = 0; + int ret=0; + int i; + INIT_TIMING(free_time); + + if (num <= 0) { + nova_dbg("%s ERROR: free %d\n", __func__, num); + return -EINVAL; + } + + NOVA_START_TIMING(free_blocks_t, free_time); + /* NOVA DEDUP KHJ */ + //cpuid = blocknr / sbi->per_list_blocks; + cpuid = (blocknr - sbi->head_reserved_blocks)/sbi->per_list_blocks; + + /* Pre-allocate blocknode */ + curr_node = nova_alloc_blocknode(sb); + if (curr_node == NULL) { + /* returning without freeing the block*/ + NOVA_END_TIMING(free_blocks_t, free_time); + return -ENOMEM; + } + + free_list = nova_get_free_list(sb, cpuid); + num_blocks = nova_get_numblocks(btype) * num; + t_block_low = blocknr; + t_block_high = blocknr + num_blocks - 1; + + block_low = block_high = 0; + + /* NOVA DEDUP KHJ */ + for(i=t_block_low; i<=t_block_high;i++){ + if(nova_dedup_is_duplicate(sb, i,false) != 0){ + block_low = t_block_low; + block_high = i; + if(i != t_block_high) + continue; + } + else{ + t_block_low = i+1; + if(block_low == block_high && block_low == 0) + continue; + } + + + spin_lock(&free_list->s_lock); + tree = &(free_list->block_free_tree); + + //num_blocks = nova_get_numblocks(btype) * num; + //block_low = blocknr; + //block_high = blocknr + num_blocks - 1; + + nova_dbgv("Free: %lu - %lu\n", block_low, block_high); + + if (blocknr < free_list->block_start || + blocknr + num > free_list->block_end + 1) { + nova_err(sb, "free blocks %lu to %lu, free list %d, " + "start %lu, end %lu\n", + blocknr, blocknr + num - 1, + free_list->index, + free_list->block_start, + free_list->block_end); + ret = -EIO; + goto out; + } + + ret = nova_find_free_slot(tree, block_low, + block_high, &prev, &next); + + if (ret) { + nova_dbg("%s: find free slot fail: %d\n", __func__, ret); + goto out; + } + + if (prev && next && (block_low == prev->range_high + 1) && + (block_high + 1 == next->range_low)) { + /* fits the hole */ + rb_erase(&next->node, tree); + free_list->num_blocknode--; + prev->range_high = next->range_high; + nova_update_range_node_checksum(prev); + if (free_list->last_node == next) + free_list->last_node = prev; + nova_free_blocknode(next); + goto block_found; + } + if (prev && (block_low == prev->range_high + 1)) { + /* Aligns left */ + prev->range_high += num_blocks; + nova_update_range_node_checksum(prev); + goto block_found; + } + if (next && (block_high + 1 == next->range_low)) { + /* Aligns right */ + next->range_low -= num_blocks; + nova_update_range_node_checksum(next); + goto block_found; + } + + /* Aligns somewhere in the middle */ + curr_node->range_low = block_low; + curr_node->range_high = block_high; + nova_update_range_node_checksum(curr_node); + new_node_used = 1; + ret = nova_insert_blocktree(tree, curr_node); + if (ret) { + new_node_used = 0; + goto out; + } + if (!prev) + free_list->first_node = curr_node; + if (!next) + free_list->last_node = curr_node; + + free_list->num_blocknode++; block_found: - free_list->num_free_blocks += num_blocks; + free_list->num_free_blocks += num_blocks; - if (log_page) { - free_list->free_log_count++; - free_list->freed_log_pages += num_blocks; - } else { - free_list->free_data_count++; - free_list->freed_data_pages += num_blocks; - } + if (log_page) { + free_list->free_log_count++; + free_list->freed_log_pages += num_blocks; + } else { + free_list->free_data_count++; + free_list->freed_data_pages += num_blocks; + } out: - spin_unlock(&free_list->s_lock); - if (new_node_used == 0) - nova_free_blocknode(curr_node); - - block_low = 0; - block_high = 0; - } - NOVA_END_TIMING(free_blocks_t, free_time); - return ret; + spin_unlock(&free_list->s_lock); + if (new_node_used == 0) + nova_free_blocknode(curr_node); + + block_low = 0; + block_high = 0; + } + NOVA_END_TIMING(free_blocks_t, free_time); + return ret; } int nova_free_data_blocks(struct super_block *sb, - struct nova_inode_info_header *sih, unsigned long blocknr, int num) + struct nova_inode_info_header *sih, unsigned long blocknr, int num) { - int ret; - INIT_TIMING(free_time); - - nova_dbgv("Inode %lu: free %d data block from %lu to %lu\n", - sih->ino, num, blocknr, blocknr + num - 1); - if (blocknr == 0) { - nova_dbg("%s: ERROR: %lu, %d\n", __func__, blocknr, num); - return -EINVAL; - } - NOVA_START_TIMING(free_data_t, free_time); - ret = nova_free_blocks(sb, blocknr, num, sih->i_blk_type, 0); - if (ret) { - nova_err(sb, "Inode %lu: free %d data block from %lu to %lu " - "failed!\n", - sih->ino, num, blocknr, blocknr + num - 1); - nova_print_nova_log(sb, sih); - } - NOVA_END_TIMING(free_data_t, free_time); - - return ret; + int ret; + INIT_TIMING(free_time); + + nova_dbgv("Inode %lu: free %d data block from %lu to %lu\n", + sih->ino, num, blocknr, blocknr + num - 1); + if (blocknr == 0) { + nova_dbg("%s: ERROR: %lu, %d\n", __func__, blocknr, num); + return -EINVAL; + } + NOVA_START_TIMING(free_data_t, free_time); + /* DEDUP NOVA KHJ */ + ret = nova_dedup_free_blocks(sb, blocknr, num, sih->i_blk_type, 0); + if (ret) { + nova_err(sb, "Inode %lu: free %d data block from %lu to %lu " + "failed!\n", + sih->ino, num, blocknr, blocknr + num - 1); + nova_print_nova_log(sb, sih); + } + NOVA_END_TIMING(free_data_t, free_time); + + return ret; } + int nova_free_log_blocks(struct super_block *sb, - struct nova_inode_info_header *sih, unsigned long blocknr, int num) + struct nova_inode_info_header *sih, unsigned long blocknr, int num) { - int ret; - INIT_TIMING(free_time); - - nova_dbgv("Inode %lu: free %d log block from %lu to %lu\n", - sih->ino, num, blocknr, blocknr + num - 1); - if (blocknr == 0) { - nova_dbg("%s: ERROR: %lu, %d\n", __func__, blocknr, num); - return -EINVAL; - } - NOVA_START_TIMING(free_log_t, free_time); - ret = nova_free_blocks(sb, blocknr, num, sih->i_blk_type, 1); - if (ret) { - nova_err(sb, "Inode %lu: free %d log block from %lu to %lu " - "failed!\n", - sih->ino, num, blocknr, blocknr + num - 1); - nova_print_nova_log(sb, sih); - } - NOVA_END_TIMING(free_log_t, free_time); - - return ret; + int ret; + INIT_TIMING(free_time); + + nova_dbgv("Inode %lu: free %d log block from %lu to %lu\n", + sih->ino, num, blocknr, blocknr + num - 1); + if (blocknr == 0) { + nova_dbg("%s: ERROR: %lu, %d\n", __func__, blocknr, num); + return -EINVAL; + } + NOVA_START_TIMING(free_log_t, free_time); + ret = nova_free_blocks(sb, blocknr, num, sih->i_blk_type, 1); + if (ret) { + nova_err(sb, "Inode %lu: free %d log block from %lu to %lu " + "failed!\n", + sih->ino, num, blocknr, blocknr + num - 1); + nova_print_nova_log(sb, sih); + } + NOVA_END_TIMING(free_log_t, free_time); + + return ret; } static int not_enough_blocks(struct free_list *free_list, - unsigned long num_blocks, enum alloc_type atype) + unsigned long num_blocks, enum alloc_type atype) { - struct nova_range_node *first = free_list->first_node; - struct nova_range_node *last = free_list->last_node; - - if (free_list->num_free_blocks < num_blocks || !first || !last) { - nova_dbgv("%s: num_free_blocks=%ld; num_blocks=%ld; " - "first=0x%p; last=0x%p", - __func__, free_list->num_free_blocks, num_blocks, - first, last); - return 1; - } - - if (atype == LOG && - last->range_high - first->range_low < DEAD_ZONE_BLOCKS) { - nova_dbgv("%s: allocation would cause deadzone violation. " - "high=0x%lx, low=0x%lx, DEADZONE=%d", - __func__, last->range_high, first->range_low, - DEAD_ZONE_BLOCKS); - return 1; - } - - return 0; + struct nova_range_node *first = free_list->first_node; + struct nova_range_node *last = free_list->last_node; + + if (free_list->num_free_blocks < num_blocks || !first || !last) { + nova_dbgv("%s: num_free_blocks=%ld; num_blocks=%ld; " + "first=0x%p; last=0x%p", + __func__, free_list->num_free_blocks, num_blocks, + first, last); + return 1; + } + + if (atype == LOG && + last->range_high - first->range_low < DEAD_ZONE_BLOCKS) { + nova_dbgv("%s: allocation would cause deadzone violation. " + "high=0x%lx, low=0x%lx, DEADZONE=%d", + __func__, last->range_high, first->range_low, + DEAD_ZONE_BLOCKS); + return 1; + } + + return 0; } struct nova_range_node *nova_alloc_blocknode_atomic(struct super_block *sb) { - return nova_alloc_range_node_atomic(sb); + return nova_alloc_range_node_atomic(sb); } #define PAGES_PER_2MB 512 #define PAGES_PER_2MB_MASK (512 - 1) #define IS_DATABLOCKS_2MB_ALIGNED(numblocks, atype) \ - (!(num_blocks & PAGES_PER_2MB_MASK) && (atype == DATA)) + (!(num_blocks & PAGES_PER_2MB_MASK) && (atype == DATA)) /* This method returns the number of blocks allocated. */ static long nova_alloc_superpage(struct super_block *sb, - struct free_list *free_list, unsigned long num_blocks, - unsigned long *new_blocknr, enum nova_alloc_direction from_tail) + struct free_list *free_list, unsigned long num_blocks, + unsigned long *new_blocknr, enum nova_alloc_direction from_tail) { - struct rb_root *tree; - struct rb_node *temp; - struct nova_range_node *curr; - unsigned long curr_blocks; - bool found = 0; - unsigned long step = 0; - - unsigned int left_margin; - unsigned int right_margin; - - tree = &(free_list->block_free_tree); - if (from_tail == ALLOC_FROM_HEAD) - temp = &(free_list->first_node->node); - else - temp = &(free_list->last_node->node); - - while (temp) { - step++; - curr = container_of(temp, struct nova_range_node, node); - - if (!nova_range_node_checksum_ok(curr)) { - nova_err(sb, "%s curr failed\n", __func__); - goto next; - } - - curr_blocks = curr->range_high - curr->range_low + 1; - left_margin = PAGES_PER_2MB - - (curr->range_low & PAGES_PER_2MB_MASK); - - /* We are doing best effort here to allocate as many 2MB blocks as possible. */ - if (num_blocks > (curr_blocks - left_margin)) { - if (((curr_blocks - left_margin) & ~PAGES_PER_2MB_MASK) > 0) - num_blocks = (curr_blocks - left_margin) & ~PAGES_PER_2MB_MASK; - } - - /* - * Guard against cases where: - * a. Unaligned free blocks is smaller than #512 - * left_margin could larger than curr_blocks. - * b. After alignment, free blocks is smaller than - * requested blocks. - * Otherwise, we are free to go. - */ - if ((curr_blocks > left_margin) && \ - (num_blocks <= (curr_blocks - left_margin))) { - struct nova_range_node *node; - unsigned long saved_range_high = curr->range_high; - - *new_blocknr = curr->range_low + left_margin; - right_margin = curr_blocks - left_margin - num_blocks; - nova_dbgv("curr:%p: num_blocks:%lu curr->range_low:%lu high:%lu", - curr, num_blocks, curr->range_low, curr->range_high); - - if (left_margin) { - /* Reuse "curr" and its "first_node" indicator. */ - curr->range_high = curr->range_low + left_margin - 1; - nova_update_range_node_checksum(curr); - nova_dbgv("Insert node for left_margin, range_low:%lu high:%lu", - curr->range_low, curr->range_high); - } - - if (right_margin) { - if (left_margin) { - /* curr was reused for left_margin node, grab new one. */ - node = nova_alloc_blocknode_atomic(sb); - if (node == NULL) { - nova_warn("Failed to allocate new block node.\n"); - return -ENOMEM; - } - node->range_low = curr->range_low + left_margin + num_blocks; - node->range_high = saved_range_high; - nova_update_range_node_checksum(node); - nova_insert_blocktree(tree, node); - free_list->num_blocknode++; - if (curr == free_list->last_node) - free_list->last_node = node; - } else { - /* - * curr->range_low is aligned, reuse curr for right_margin. - * Update the checksum as needed. - */ - curr->range_low = curr->range_low + num_blocks; - nova_update_range_node_checksum(curr); - } - nova_dbgv("Insert node for right_margin, range_low:%lu high:%lu", - node->range_low, node->range_high); - } - - /* Catch up special case where curr is aligned and used up. */ - if (!left_margin && !right_margin) { - - /* corner case in corner, spotted by Andiry. */ - node = NULL; - if (curr == free_list->first_node) { - temp = rb_next(temp); - if (temp) - node = container_of(temp, struct nova_range_node, node); - free_list->first_node = node; - } - if (curr == free_list->last_node) { - temp = rb_prev(temp); - if (temp) - node = container_of(temp, struct nova_range_node, node); - free_list->last_node = node; - } - - /* release curr after updating {first, last}_node */ - rb_erase(&curr->node, tree); - nova_free_blocknode(curr); - free_list->num_blocknode--; - } - - found = 1; - break; - } -next: - if (from_tail == ALLOC_FROM_HEAD) - temp = rb_next(temp); - else - temp = rb_prev(temp); + struct rb_root *tree; + struct rb_node *temp; + struct nova_range_node *curr; + unsigned long curr_blocks; + bool found = 0; + unsigned long step = 0; + + unsigned int left_margin; + unsigned int right_margin; + + tree = &(free_list->block_free_tree); + if (from_tail == ALLOC_FROM_HEAD) + temp = &(free_list->first_node->node); + else + temp = &(free_list->last_node->node); + + while (temp) { + step++; + curr = container_of(temp, struct nova_range_node, node); + + if (!nova_range_node_checksum_ok(curr)) { + nova_err(sb, "%s curr failed\n", __func__); + goto next; + } + + curr_blocks = curr->range_high - curr->range_low + 1; + left_margin = PAGES_PER_2MB - + (curr->range_low & PAGES_PER_2MB_MASK); + + /* We are doing best effort here to allocate as many 2MB blocks as possible. */ + if (num_blocks > (curr_blocks - left_margin)) { + if (((curr_blocks - left_margin) & ~PAGES_PER_2MB_MASK) > 0) + num_blocks = (curr_blocks - left_margin) & ~PAGES_PER_2MB_MASK; + } + + /* + * Guard against cases where: + * a. Unaligned free blocks is smaller than #512 + * left_margin could larger than curr_blocks. + * b. After alignment, free blocks is smaller than + * requested blocks. + * Otherwise, we are free to go. + */ + if ((curr_blocks > left_margin) && \ + (num_blocks <= (curr_blocks - left_margin))) { + struct nova_range_node *node; + unsigned long saved_range_high = curr->range_high; + + *new_blocknr = curr->range_low + left_margin; + right_margin = curr_blocks - left_margin - num_blocks; + nova_dbgv("curr:%p: num_blocks:%lu curr->range_low:%lu high:%lu", + curr, num_blocks, curr->range_low, curr->range_high); + + if (left_margin) { + /* Reuse "curr" and its "first_node" indicator. */ + curr->range_high = curr->range_low + left_margin - 1; + nova_update_range_node_checksum(curr); + nova_dbgv("Insert node for left_margin, range_low:%lu high:%lu", + curr->range_low, curr->range_high); + } + + if (right_margin) { + if (left_margin) { + /* curr was reused for left_margin node, grab new one. */ + node = nova_alloc_blocknode_atomic(sb); + if (node == NULL) { + nova_warn("Failed to allocate new block node.\n"); + return -ENOMEM; + } + node->range_low = curr->range_low + left_margin + num_blocks; + node->range_high = saved_range_high; + nova_update_range_node_checksum(node); + nova_insert_blocktree(tree, node); + free_list->num_blocknode++; + if (curr == free_list->last_node) + free_list->last_node = node; + } else { + /* + * curr->range_low is aligned, reuse curr for right_margin. + * Update the checksum as needed. + */ + curr->range_low = curr->range_low + num_blocks; + nova_update_range_node_checksum(curr); + } + nova_dbgv("Insert node for right_margin, range_low:%lu high:%lu", + node->range_low, node->range_high); + } + + /* Catch up special case where curr is aligned and used up. */ + if (!left_margin && !right_margin) { + + /* corner case in corner, spotted by Andiry. */ + node = NULL; + if (curr == free_list->first_node) { + temp = rb_next(temp); + if (temp) + node = container_of(temp, struct nova_range_node, node); + free_list->first_node = node; + } + if (curr == free_list->last_node) { + temp = rb_prev(temp); + if (temp) + node = container_of(temp, struct nova_range_node, node); + free_list->last_node = node; } - NOVA_STATS_ADD(alloc_steps, step); - return found ? num_blocks : 0; + /* release curr after updating {first, last}_node */ + rb_erase(&curr->node, tree); + nova_free_blocknode(curr); + free_list->num_blocknode--; + } + + found = 1; + break; + } +next: + if (from_tail == ALLOC_FROM_HEAD) + temp = rb_next(temp); + else + temp = rb_prev(temp); + } + + NOVA_STATS_ADD(alloc_steps, step); + return found ? num_blocks : 0; } /* Return how many blocks allocated */ static long nova_alloc_blocks_in_free_list(struct super_block *sb, - struct free_list *free_list, unsigned short btype, - enum alloc_type atype, unsigned long num_blocks, - unsigned long *new_blocknr, enum nova_alloc_direction from_tail) + struct free_list *free_list, unsigned short btype, + enum alloc_type atype, unsigned long num_blocks, + unsigned long *new_blocknr, enum nova_alloc_direction from_tail) { - struct rb_root *tree; - struct nova_range_node *curr, *next = NULL, *prev = NULL; - struct rb_node *temp, *next_node, *prev_node; - unsigned long curr_blocks; - long ret_blocks = 0; - bool found = 0; - bool found_hugeblock = 0; - unsigned long step = 0; - - if (!free_list->first_node || free_list->num_free_blocks == 0) { - nova_dbgv("%s: Can't alloc. free_list->first_node=0x%p " - "free_list->num_free_blocks = %lu", - __func__, free_list->first_node, - free_list->num_free_blocks); - return -ENOSPC; - } - - if (atype == LOG && not_enough_blocks(free_list, num_blocks, atype)) { - nova_dbgv("%s: Can't alloc. not_enough_blocks() == true", - __func__); - return -ENOSPC; - } - - tree = &(free_list->block_free_tree); - if (from_tail == ALLOC_FROM_HEAD) - temp = &(free_list->first_node->node); - else - temp = &(free_list->last_node->node); - - /* Try huge block allocation for data blocks first */ - if (IS_DATABLOCKS_2MB_ALIGNED(num_blocks, atype)) { - ret_blocks = nova_alloc_superpage(sb, free_list, - num_blocks, new_blocknr, from_tail); - if (ret_blocks > 0 && *new_blocknr != 0) { - num_blocks = ret_blocks; - found_hugeblock = 1; - goto success; - } - } - - /* fallback to un-aglined allocation then */ - while (temp) { - step++; - curr = container_of(temp, struct nova_range_node, node); - - if (!nova_range_node_checksum_ok(curr)) { - nova_err(sb, "%s curr failed\n", __func__); - goto next; - } - - curr_blocks = curr->range_high - curr->range_low + 1; - - if (num_blocks >= curr_blocks) { - /* Superpage allocation must succeed */ - if (btype > 0 && num_blocks > curr_blocks) - goto next; - - /* Otherwise, allocate the whole blocknode */ - if (curr == free_list->first_node) { - next_node = rb_next(temp); - if (next_node) - next = container_of(next_node, - struct nova_range_node, node); - free_list->first_node = next; - } - - if (curr == free_list->last_node) { - prev_node = rb_prev(temp); - if (prev_node) - prev = container_of(prev_node, - struct nova_range_node, node); - free_list->last_node = prev; - } - - rb_erase(&curr->node, tree); - free_list->num_blocknode--; - num_blocks = curr_blocks; - *new_blocknr = curr->range_low; - nova_free_blocknode(curr); - found = 1; - break; - } - - /* Allocate partial blocknode */ - if (from_tail == ALLOC_FROM_HEAD) { - *new_blocknr = curr->range_low; - curr->range_low += num_blocks; - } else { - *new_blocknr = curr->range_high + 1 - num_blocks; - curr->range_high -= num_blocks; - } - - nova_update_range_node_checksum(curr); - found = 1; - break; + struct rb_root *tree; + struct nova_range_node *curr, *next = NULL, *prev = NULL; + struct rb_node *temp, *next_node, *prev_node; + unsigned long curr_blocks; + long ret_blocks = 0; + bool found = 0; + bool found_hugeblock = 0; + unsigned long step = 0; + + if (!free_list->first_node || free_list->num_free_blocks == 0) { + nova_dbgv("%s: Can't alloc. free_list->first_node=0x%p " + "free_list->num_free_blocks = %lu", + __func__, free_list->first_node, + free_list->num_free_blocks); + return -ENOSPC; + } + + if (atype == LOG && not_enough_blocks(free_list, num_blocks, atype)) { + nova_dbgv("%s: Can't alloc. not_enough_blocks() == true", + __func__); + return -ENOSPC; + } + + tree = &(free_list->block_free_tree); + if (from_tail == ALLOC_FROM_HEAD) + temp = &(free_list->first_node->node); + else + temp = &(free_list->last_node->node); + + /* Try huge block allocation for data blocks first */ + if (IS_DATABLOCKS_2MB_ALIGNED(num_blocks, atype)) { + ret_blocks = nova_alloc_superpage(sb, free_list, + num_blocks, new_blocknr, from_tail); + if (ret_blocks > 0 && *new_blocknr != 0) { + num_blocks = ret_blocks; + found_hugeblock = 1; + goto success; + } + } + + /* fallback to un-aglined allocation then */ + while (temp) { + step++; + curr = container_of(temp, struct nova_range_node, node); + + if (!nova_range_node_checksum_ok(curr)) { + nova_err(sb, "%s curr failed\n", __func__); + goto next; + } + + curr_blocks = curr->range_high - curr->range_low + 1; + + if (num_blocks >= curr_blocks) { + /* Superpage allocation must succeed */ + if (btype > 0 && num_blocks > curr_blocks) + goto next; + + /* Otherwise, allocate the whole blocknode */ + if (curr == free_list->first_node) { + next_node = rb_next(temp); + if (next_node) + next = container_of(next_node, + struct nova_range_node, node); + free_list->first_node = next; + } + + if (curr == free_list->last_node) { + prev_node = rb_prev(temp); + if (prev_node) + prev = container_of(prev_node, + struct nova_range_node, node); + free_list->last_node = prev; + } + + rb_erase(&curr->node, tree); + free_list->num_blocknode--; + num_blocks = curr_blocks; + *new_blocknr = curr->range_low; + nova_free_blocknode(curr); + found = 1; + break; + } + + /* Allocate partial blocknode */ + if (from_tail == ALLOC_FROM_HEAD) { + *new_blocknr = curr->range_low; + curr->range_low += num_blocks; + } else { + *new_blocknr = curr->range_high + 1 - num_blocks; + curr->range_high -= num_blocks; + } + + nova_update_range_node_checksum(curr); + found = 1; + break; next: - if (from_tail == ALLOC_FROM_HEAD) - temp = rb_next(temp); - else - temp = rb_prev(temp); - } - - if (free_list->num_free_blocks < num_blocks) { - nova_dbg("%s: free list %d has %lu free blocks, " - "but allocated %lu blocks?\n", - __func__, free_list->index, - free_list->num_free_blocks, num_blocks); - return -ENOSPC; - } + if (from_tail == ALLOC_FROM_HEAD) + temp = rb_next(temp); + else + temp = rb_prev(temp); + } + + if (free_list->num_free_blocks < num_blocks) { + nova_dbg("%s: free list %d has %lu free blocks, " + "but allocated %lu blocks?\n", + __func__, free_list->index, + free_list->num_free_blocks, num_blocks); + return -ENOSPC; + } success: - if ((found == 1) || (found_hugeblock == 1)) - free_list->num_free_blocks -= num_blocks; - else { - nova_dbgv("%s: Can't alloc. found = %d", __func__, found); - return -ENOSPC; - } + if ((found == 1) || (found_hugeblock == 1)) + free_list->num_free_blocks -= num_blocks; + else { + nova_dbgv("%s: Can't alloc. found = %d", __func__, found); + return -ENOSPC; + } - NOVA_STATS_ADD(alloc_steps, step); + NOVA_STATS_ADD(alloc_steps, step); - return num_blocks; + return num_blocks; } /* Find out the free list with most free blocks */ static int nova_get_candidate_free_list(struct super_block *sb) { - struct nova_sb_info *sbi = NOVA_SB(sb); - struct free_list *free_list; - int cpuid = 0; - int num_free_blocks = 0; - int i; - - for (i = 0; i < sbi->cpus; i++) { - free_list = nova_get_free_list(sb, i); - if (free_list->num_free_blocks > num_free_blocks) { - cpuid = i; - num_free_blocks = free_list->num_free_blocks; - } - } - - return cpuid; + struct nova_sb_info *sbi = NOVA_SB(sb); + struct free_list *free_list; + int cpuid = 0; + int num_free_blocks = 0; + int i; + + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + if (free_list->num_free_blocks > num_free_blocks) { + cpuid = i; + num_free_blocks = free_list->num_free_blocks; + } + } + + return cpuid; } static int nova_new_blocks(struct super_block *sb, unsigned long *blocknr, - unsigned int num, unsigned short btype, int zero, - enum alloc_type atype, int cpuid, enum nova_alloc_direction from_tail) + unsigned int num, unsigned short btype, int zero, + enum alloc_type atype, int cpuid, enum nova_alloc_direction from_tail) { - struct free_list *free_list; - void *bp; - unsigned long num_blocks = 0; - unsigned long new_blocknr = 0; - long ret_blocks = 0; - int retried = 0; - unsigned long irq_flags = 0; - INIT_TIMING(alloc_time); - - num_blocks = num * nova_get_numblocks(btype); - if (num_blocks == 0) { - nova_dbg_verbose("%s: num_blocks == 0", __func__); - return -EINVAL; - } - - NOVA_START_TIMING(new_blocks_t, alloc_time); - if (cpuid == ANY_CPU) - cpuid = nova_get_cpuid(sb); + struct free_list *free_list; + void *bp; + unsigned long num_blocks = 0; + unsigned long new_blocknr = 0; + long ret_blocks = 0; + int retried = 0; + unsigned long irq_flags = 0; + INIT_TIMING(alloc_time); + + num_blocks = num * nova_get_numblocks(btype); + if (num_blocks == 0) { + nova_dbg_verbose("%s: num_blocks == 0", __func__); + return -EINVAL; + } + + NOVA_START_TIMING(new_blocks_t, alloc_time); + if (cpuid == ANY_CPU) + cpuid = nova_get_cpuid(sb); retry: - free_list = nova_get_free_list(sb, cpuid); - spin_lock(&free_list->s_lock); - - if (not_enough_blocks(free_list, num_blocks, atype)) { - nova_dbgv("%s: cpu %d, free_blocks %lu, required %lu, " - "blocknode %lu\n", - __func__, cpuid, free_list->num_free_blocks, - num_blocks, free_list->num_blocknode); - - if (retried >= 2) - /* Allocate anyway */ - goto alloc; - - spin_unlock(&free_list->s_lock); - cpuid = nova_get_candidate_free_list(sb); - retried++; - goto retry; - } + free_list = nova_get_free_list(sb, cpuid); + spin_lock(&free_list->s_lock); + + if (not_enough_blocks(free_list, num_blocks, atype)) { + nova_dbgv("%s: cpu %d, free_blocks %lu, required %lu, " + "blocknode %lu\n", + __func__, cpuid, free_list->num_free_blocks, + num_blocks, free_list->num_blocknode); + + if (retried >= 2) + /* Allocate anyway */ + goto alloc; + + spin_unlock(&free_list->s_lock); + cpuid = nova_get_candidate_free_list(sb); + retried++; + goto retry; + } alloc: - ret_blocks = nova_alloc_blocks_in_free_list(sb, free_list, btype, atype, - num_blocks, &new_blocknr, from_tail); - - if (ret_blocks > 0) { - if (atype == LOG) { - free_list->alloc_log_count++; - free_list->alloc_log_pages += ret_blocks; - } else if (atype == DATA) { - free_list->alloc_data_count++; - free_list->alloc_data_pages += ret_blocks; - } - } - - spin_unlock(&free_list->s_lock); - NOVA_END_TIMING(new_blocks_t, alloc_time); - - if (ret_blocks <= 0 || new_blocknr == 0) { - nova_dbgv("%s: not able to allocate %d blocks. " - "ret_blocks=%ld; new_blocknr=%lu", - __func__, num, ret_blocks, new_blocknr); - return -ENOSPC; - } - - if (zero) { - bp = nova_get_block(sb, nova_get_block_off(sb, - new_blocknr, btype)); - nova_memunlock_range(sb, bp, PAGE_SIZE * ret_blocks, &irq_flags); - memset_nt(bp, 0, PAGE_SIZE * ret_blocks); - nova_memlock_range(sb, bp, PAGE_SIZE * ret_blocks, &irq_flags); - } - *blocknr = new_blocknr; - - nova_dbg_verbose("Alloc %lu NVMM blocks 0x%lx\n", ret_blocks, *blocknr); - return ret_blocks / nova_get_numblocks(btype); + ret_blocks = nova_alloc_blocks_in_free_list(sb, free_list, btype, atype, + num_blocks, &new_blocknr, from_tail); + + if (ret_blocks > 0) { + if (atype == LOG) { + free_list->alloc_log_count++; + free_list->alloc_log_pages += ret_blocks; + } else if (atype == DATA) { + free_list->alloc_data_count++; + free_list->alloc_data_pages += ret_blocks; + } + } + + spin_unlock(&free_list->s_lock); + NOVA_END_TIMING(new_blocks_t, alloc_time); + + if (ret_blocks <= 0 || new_blocknr == 0) { + nova_dbgv("%s: not able to allocate %d blocks. " + "ret_blocks=%ld; new_blocknr=%lu", + __func__, num, ret_blocks, new_blocknr); + return -ENOSPC; + } + + if (zero) { + bp = nova_get_block(sb, nova_get_block_off(sb, + new_blocknr, btype)); + nova_memunlock_range(sb, bp, PAGE_SIZE * ret_blocks, &irq_flags); + memset_nt(bp, 0, PAGE_SIZE * ret_blocks); + nova_memlock_range(sb, bp, PAGE_SIZE * ret_blocks, &irq_flags); + } + *blocknr = new_blocknr; + + nova_dbg_verbose("Alloc %lu NVMM blocks 0x%lx\n", ret_blocks, *blocknr); + return ret_blocks / nova_get_numblocks(btype); } // Allocate data blocks. The offset for the allocated block comes back in // blocknr. Return the number of blocks allocated. int nova_new_data_blocks(struct super_block *sb, - struct nova_inode_info_header *sih, unsigned long *blocknr, - unsigned long start_blk, unsigned int num, - enum nova_alloc_init zero, int cpu, - enum nova_alloc_direction from_tail) + struct nova_inode_info_header *sih, unsigned long *blocknr, + unsigned long start_blk, unsigned int num, + enum nova_alloc_init zero, int cpu, + enum nova_alloc_direction from_tail) { - int allocated; - INIT_TIMING(alloc_time); - - NOVA_START_TIMING(new_data_blocks_t, alloc_time); - allocated = nova_new_blocks(sb, blocknr, num, - sih->i_blk_type, zero, DATA, cpu, from_tail); - NOVA_END_TIMING(new_data_blocks_t, alloc_time); - if (allocated < 0) { - nova_dbgv("FAILED: Inode %lu, start blk %lu, " - "alloc %d data blocks from %lu to %lu\n", - sih->ino, start_blk, allocated, *blocknr, - *blocknr + allocated - 1); - } else { - nova_dbgv("Inode %lu, start blk %lu, " - "alloc %d data blocks from %lu to %lu\n", - sih->ino, start_blk, allocated, *blocknr, - *blocknr + allocated - 1); - } - return allocated; + int allocated; + INIT_TIMING(alloc_time); + + NOVA_START_TIMING(new_data_blocks_t, alloc_time); + allocated = nova_new_blocks(sb, blocknr, num, + sih->i_blk_type, zero, DATA, cpu, from_tail); + NOVA_END_TIMING(new_data_blocks_t, alloc_time); + if (allocated < 0) { + nova_dbgv("FAILED: Inode %lu, start blk %lu, " + "alloc %d data blocks from %lu to %lu\n", + sih->ino, start_blk, allocated, *blocknr, + *blocknr + allocated - 1); + } else { + nova_dbgv("Inode %lu, start blk %lu, " + "alloc %d data blocks from %lu to %lu\n", + sih->ino, start_blk, allocated, *blocknr, + *blocknr + allocated - 1); + } + return allocated; } // Allocate log blocks. The offset for the allocated block comes back in // blocknr. Return the number of blocks allocated. int nova_new_log_blocks(struct super_block *sb, - struct nova_inode_info_header *sih, - unsigned long *blocknr, unsigned int num, - enum nova_alloc_init zero, int cpu, - enum nova_alloc_direction from_tail) + struct nova_inode_info_header *sih, + unsigned long *blocknr, unsigned int num, + enum nova_alloc_init zero, int cpu, + enum nova_alloc_direction from_tail) { - int allocated; - INIT_TIMING(alloc_time); - - NOVA_START_TIMING(new_log_blocks_t, alloc_time); - allocated = nova_new_blocks(sb, blocknr, num, - sih->i_blk_type, zero, LOG, cpu, from_tail); - NOVA_END_TIMING(new_log_blocks_t, alloc_time); - if (allocated < 0) { - nova_dbgv("%s: ino %lu, failed to alloc %d log blocks", - __func__, sih->ino, num); - } else { - nova_dbgv("%s: ino %lu, alloc %d of %d log blocks %lu to %lu\n", - __func__, sih->ino, allocated, num, *blocknr, - *blocknr + allocated - 1); - } - return allocated; + int allocated; + INIT_TIMING(alloc_time); + + NOVA_START_TIMING(new_log_blocks_t, alloc_time); + allocated = nova_new_blocks(sb, blocknr, num, + sih->i_blk_type, zero, LOG, cpu, from_tail); + NOVA_END_TIMING(new_log_blocks_t, alloc_time); + if (allocated < 0) { + nova_dbgv("%s: ino %lu, failed to alloc %d log blocks", + __func__, sih->ino, num); + } else { + nova_dbgv("%s: ino %lu, alloc %d of %d log blocks %lu to %lu\n", + __func__, sih->ino, allocated, num, *blocknr, + *blocknr + allocated - 1); + } + return allocated; } unsigned long nova_count_free_blocks(struct super_block *sb) { - struct nova_sb_info *sbi = NOVA_SB(sb); - struct free_list *free_list; - unsigned long num_free_blocks = 0; - int i; - - for (i = 0; i < sbi->cpus; i++) { - free_list = nova_get_free_list(sb, i); - num_free_blocks += free_list->num_free_blocks; - } + struct nova_sb_info *sbi = NOVA_SB(sb); + struct free_list *free_list; + unsigned long num_free_blocks = 0; + int i; + + for (i = 0; i < sbi->cpus; i++) { + free_list = nova_get_free_list(sb, i); + num_free_blocks += free_list->num_free_blocks; + } - return num_free_blocks; + return num_free_blocks; } diff --git a/fs/nova/dedup.c b/fs/nova/dedup.c index 1acbc40a1da7..431060d21383 100644 --- a/fs/nova/dedup.c +++ b/fs/nova/dedup.c @@ -7,428 +7,398 @@ struct nova_dedup_queue dqueue; // Initialize Dedup Queue int nova_dedup_queue_init(void){ - INIT_LIST_HEAD(&dqueue.head.list); - mutex_init(&dqueue.lock); - dqueue.head.write_entry_address = 0; - return 0; + INIT_LIST_HEAD(&dqueue.head.list); + mutex_init(&dqueue.lock); + dqueue.head.write_entry_address = 0; + return 0; } // Insert Write Entries to Dedup Queue int nova_dedup_queue_push(u64 new_address, u64 target_inode_number){ - struct nova_dedup_queue_entry *new_data; + struct nova_dedup_queue_entry *new_data; - mutex_lock(&dqueue.lock); - new_data = kmalloc(sizeof(struct nova_dedup_queue_entry), GFP_KERNEL); - list_add_tail(&new_data->list, &dqueue.head.list); - new_data->write_entry_address = new_address; - new_data->target_inode_number = target_inode_number; - mutex_unlock(&dqueue.lock); + mutex_lock(&dqueue.lock); + new_data = kmalloc(sizeof(struct nova_dedup_queue_entry), GFP_KERNEL); + list_add_tail(&new_data->list, &dqueue.head.list); + new_data->write_entry_address = new_address; + new_data->target_inode_number = target_inode_number; + mutex_unlock(&dqueue.lock); - printk("dqueue-PUSH(Write Entry Address: %llu, Inode Number: %llu)\n",new_address,target_inode_number); - return 0; + printk("dqueue-PUSH(Write Entry Address: %llu, Inode Number: %llu)\n",new_address,target_inode_number); + return 0; } // Get next write entry to dedup u64 nova_dedup_queue_get_next_entry(u64 *target_inode_number){ - struct nova_dedup_queue_entry *ptr; - u64 ret = 0; - - mutex_lock(&dqueue.lock); - if(!list_empty(&dqueue.head.list)){ - ptr = list_entry(dqueue.head.list.next, struct nova_dedup_queue_entry, list); - - ret = ptr->write_entry_address; - *target_inode_number = ptr->target_inode_number; - - list_del(dqueue.head.list.next); - kfree(ptr); - printk("dqueue-POP(Write Entry Address: %llu, Inode Number: %llu)\n",ret,*target_inode_number); - } - mutex_unlock(&dqueue.lock); - return ret; + struct nova_dedup_queue_entry *ptr; + u64 ret = 0; + + mutex_lock(&dqueue.lock); + if(!list_empty(&dqueue.head.list)){ + ptr = list_entry(dqueue.head.list.next, struct nova_dedup_queue_entry, list); + + ret = ptr->write_entry_address; + *target_inode_number = ptr->target_inode_number; + + list_del(dqueue.head.list.next); + kfree(ptr); + printk("dqueue-POP(Write Entry Address: %llu, Inode Number: %llu)\n",ret,*target_inode_number); + } + mutex_unlock(&dqueue.lock); + return ret; } /******************** SHA1 ********************/ static struct sdesc *init_sdesc(struct crypto_shash *alg) { - struct sdesc *sdesc; - int size; - - size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); - sdesc = kmalloc(size, GFP_KERNEL); - if (!sdesc) - return ERR_PTR(-ENOMEM); - sdesc->shash.tfm = alg; - sdesc->shash.flags = 0x0; - return sdesc; + struct sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return ERR_PTR(-ENOMEM); + sdesc->shash.tfm = alg; + sdesc->shash.flags = 0x0; + return sdesc; } static int calc_hash(struct crypto_shash *alg, - const unsigned char *data, unsigned int datalen, - unsigned char *digest) + const unsigned char *data, unsigned int datalen, + unsigned char *digest) { - struct sdesc *sdesc; - int ret; - - sdesc = init_sdesc(alg); - if (IS_ERR(sdesc)) { - pr_info("can't alloc sdesc\n"); - return PTR_ERR(sdesc); - } - - ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest); - kfree(sdesc); - return ret; + struct sdesc *sdesc; + int ret; + + sdesc = init_sdesc(alg); + if (IS_ERR(sdesc)) { + pr_info("can't alloc sdesc\n"); + return PTR_ERR(sdesc); + } + + ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest); + kfree(sdesc); + return ret; } int nova_dedup_fingerprint(unsigned char* datapage, unsigned char * ret_fingerprint){ - struct crypto_shash *alg; - char *hash_alg_name = "sha1"; - //char *hash_alg_name = "md5"; - int ret; - - alg = crypto_alloc_shash(hash_alg_name,0,0); - if(IS_ERR(alg)){ - pr_info("can't alloc alg %s\n",hash_alg_name); - return PTR_ERR(alg); - } - ret = calc_hash(alg,datapage,DATABLOCK_SIZE,ret_fingerprint); - crypto_free_shash(alg); - return ret; + struct crypto_shash *alg; + char *hash_alg_name = "sha1"; + //char *hash_alg_name = "md5"; + int ret; + + alg = crypto_alloc_shash(hash_alg_name,0,0); + if(IS_ERR(alg)){ + pr_info("can't alloc alg %s\n",hash_alg_name); + return PTR_ERR(alg); + } + ret = calc_hash(alg,datapage,DATABLOCK_SIZE,ret_fingerprint); + crypto_free_shash(alg); + return ret; } /******************** OTHER ********************/ -// Return the number of new write entries needed +// Return the number of new write entries to append +// Return the number of duplicate data pages int nova_dedup_num_new_write_entry(short *target, int num_pages){ - int i,j; - int ret=0; - int invalid_count = 0; - - for(i=0;itree, pgoff); - if(!pentry) - return 0; - referenced_entry = radix_tree_deref_slot(pentry); - - if(referenced_entry == entry) - return 1; - else{ - printk("Invalid DataPage Detected\n"); - return 0; - } + ,struct nova_inode_info_header *sih, unsigned long pgoff){ + struct nova_file_write_entry *referenced_entry; + void ** pentry; + pentry = radix_tree_lookup_slot(&sih->tree, pgoff); + if(!pentry) // Entry has been deleted + return 0; + referenced_entry = radix_tree_deref_slot(pentry); + + if(referenced_entry == entry) // Entry has been modified + return 1; + else{ + printk("NOVA ERROR: Invalid DataPage Detected\n"); + return 0; + } } -int nova_dedup_reassign_file_tree(struct super_block *sb, - struct nova_inode_info_header *sih, u64 begin_tail) -{ - void *addr; - struct nova_file_write_entry *entry; - struct nova_file_write_entry *entryc, entry_copy; - u64 curr_p = begin_tail; - size_t entry_size = sizeof(struct nova_file_write_entry); - - entryc = (metadata_csum == 0) ? entry : &entry_copy; - - while (curr_p && curr_p != sih->log_tail) { - if (is_last_entry(curr_p, entry_size)) - curr_p = next_log_page(sb, curr_p); - - if (curr_p == 0) { - nova_err(sb, "%s: File inode %lu log is NULL!\n", - __func__, sih->ino); - return -EINVAL; - } - - addr = (void *) nova_get_block(sb, curr_p); - entry = (struct nova_file_write_entry *) addr; - - if (metadata_csum == 0) - entryc = entry; - else if (!nova_verify_entry_csum(sb, entry, entryc)) - return -EIO; - - if (nova_get_entry_type(entryc) != FILE_WRITE) { - nova_dbg("%s: entry type is not write? %d\n", - __func__, nova_get_entry_type(entry)); - curr_p += entry_size; - continue; - } - - nova_assign_write_entry(sb, sih, entry, entryc, false); - curr_p += entry_size; - } - return 0; -} +/******************** FACT ********************/ +// TODO Range Lock in FACT table -int nova_dedup_invalidate_target_entry(struct super_block *sb, - struct nova_inode_info_header *sih, struct nova_file_write_entry *target_entry){ - - unsigned long start_pgoff = target_entry->pgoff; - unsigned int num = target_entry->num_pages; - unsigned long curr_pgoff; - unsigned long start_blocknr = (target_entry->block)>>PAGE_SHIFT; - unsigned long curr_blocknr; - int i; - int ret = 0; - for (i = 0; i < num; i++) { - curr_pgoff = start_pgoff + i; - curr_blocknr = start_blocknr + i; - - // duplicate: Free (not inside dedup table) - if(nova_dedup_is_duplicate(sb,curr_blocknr,true) == 2) - nova_free_old_entry(sb, sih,target_entry, - curr_pgoff,1,false,target_entry->epoch_id); - // unique: Don't Free - else - nova_invalidate_write_entry(sb,target_entry,1,1); - } - nova_invalidate_write_entry(sb, target_entry, 1, 0); - return ret; +int nova_dedup_FACT_init(struct super_block *sb){ + unsigned long i; + unsigned long start = FACT_TABLE_START; + unsigned long end = FACT_TABLE_INDEX_MAX; + unsigned long irq_flags=0; + unsigned long target_index; + void *addr; + + char fill[NOVA_DEF_BLOCK_SIZE_4K]; + memset(fill,0,NOVA_DEF_BLOCK_SIZE_4K); + for(i =start; i<=end;i++){ + target_index = NOVA_DEF_BLOCK_SIZE_4K * (FACT_TABLE_START + i); + addr = (void*)nova_get_block(sb,target_index); + + nova_memunlock_range(sb,addr, NOVA_DEF_BLOCK_SIZE_4K, &irq_flags); + memcpy_to_pmem_nocache(addr, &fill, NOVA_DEF_BLOCK_SIZE_4K); + nova_memlock_range(sb,addr, NOVA_DEF_BLOCK_SIZE_4K, &irq_flags); + } + + return 1; } - -/******************** FACT ********************/ -// TODO Range Lock in FACT table -// Find FACT entry with index(of FACT) +// Check FACT entry with index(of FACT) int nova_dedup_FACT_index_check(u64 index){ - if(index > FACT_TABLE_INDEX_MAX){ - printk("Index Out of Range: %llu\n",index); - return 1; - } - return 0; + if(index > FACT_TABLE_INDEX_MAX){ + printk("Index Out of Range: %llu\n",index); + return 1; + } + return 0; } +// Update Count after tail has been updated. int nova_dedup_FACT_update_count(struct super_block *sb, u64 index){ - u32 count = 0; - u8 compare = (1<<4)-1; - struct fact_entry* target_entry; - unsigned long irq_flags=0; - u64 target_index; - - if(nova_dedup_FACT_index_check(index)) - return 1; - - // Read Actual Index - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; - target_entry = (struct fact_entry *)nova_get_block(sb,target_index); - target_index = target_entry->delete_target; - - if(nova_dedup_FACT_index_check(target_index)) - return 1; - - // Read Count of Actual Index - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + target_index * NOVA_FACT_ENTRY_SIZE; - target_entry = (struct fact_entry*) nova_get_block(sb,target_index); - count = target_entry->count; - // IF update Count > 0 - if(compare & count){ - // decrease update count 1 - // increase reference count 1 - count += 15; - if(count > ((1UL<<32)-1)){ - printk("ERROR: Overflow\n"); - return 1; - } - // Reference count, update count Atomic Update - nova_memunlock_range(sb,target_entry,NOVA_FACT_ENTRY_SIZE, &irq_flags); - PERSISTENT_BARRIER(); - target_entry->count = count; - nova_memlock_range(sb,target_entry,NOVA_FACT_ENTRY_SIZE, &irq_flags); - } - return 0; + u64 count = 0; + u64 compare = ((unsigned long)1<<32)-1; + struct fact_entry* target_entry; + unsigned long irq_flags=0; + u64 target_index; + u64 temp_index; + // Check index is in range + if(nova_dedup_FACT_index_check(index)) + return 1; + + // Read Actual Index + target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; + target_entry = (struct fact_entry *)nova_get_block(sb,target_index); + target_index = target_entry->delete_entry; + + // Check index is in range + if(nova_dedup_FACT_index_check(target_index)) + return 1; + + // Read Count of Actual Index + temp_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + target_index * NOVA_FACT_ENTRY_SIZE; + target_entry = (struct fact_entry*) nova_get_block(sb,temp_index); + count = target_entry->count; + + // IF update Count > 0 + if(compare & count){ + // decrease update count 1 + // increase reference count 1 + count += compare; + + // Reference count, update count Atomic Update + nova_memunlock_range(sb,target_entry,NOVA_FACT_ENTRY_SIZE, &irq_flags); + PERSISTENT_BARRIER(); + target_entry->count = count; + nova_flush_buffer(&target_entry->count,CACHELINE_SIZE,1); + nova_memlock_range(sb,target_entry,NOVA_FACT_ENTRY_SIZE, &irq_flags); + } + return 0; } // For debugging int nova_dedup_FACT_read(struct super_block *sb, u64 index){ - int r_count,u_count; - unsigned char fingerprint[20]; - u64 block_address; - int next; - int delete_address; - struct fact_entry* target; - u64 target_index; - - if(nova_dedup_FACT_index_check(index)) - return 1; - - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; - target = (struct fact_entry*)nova_get_block(sb,target_index); - r_count = target->count; - u_count = target->count; - - - strncpy(fingerprint,target->fingerprint,FINGERPRINT_SIZE); - block_address = target->block_address; - next = target->next; - delete_address = target->delete_target; - r_count >>= 4; - u_count &= 15; - - printk("Fingerprint:%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X, Block address: %lu, Reference count: %d, Update count: %d, next: %d, delete_address: %d\n", fingerprint[0],fingerprint[1],fingerprint[2],fingerprint[3],fingerprint[4],fingerprint[5],fingerprint[6],fingerprint[7],fingerprint[8],fingerprint[9],fingerprint[10],fingerprint[11],fingerprint[12],fingerprint[13],fingerprint[14],fingerprint[15],fingerprint[16],fingerprint[17],fingerprint[18],fingerprint[19],block_address,r_count,u_count,next,delete_address); - printk("FACT table insert complete, reference count: %d, update count: %d\n",r_count,u_count); - return 0; + int r_count,u_count; + u64 block_address; + u64 next; + struct fact_entry* target; + u64 target_index; + + // Check index range + if(nova_dedup_FACT_index_check(index)) + return 1; + + target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; + target = (struct fact_entry*)nova_get_block(sb,target_index); + r_count = target->count>>32; + u_count = target->count; + block_address = target->block_address; + next = target->next; + + printk("index:%lld, ref_count:%d, up_count: %d, next:%lld, block_address: %lld\n", + index,r_count,u_count,next,block_address); + return 0; } // Is fact entry empty? int nova_dedup_is_empty(struct fact_entry target){ - if(target.count ==0) - return 1; - return 0; + if(target.count == 0) + return 1; + return 0; } // Insert new FACT entry int nova_dedup_FACT_insert(struct super_block *sb, struct fingerprint_lookup_data* lookup){ - unsigned long irq_flags=0; - struct fact_entry te; // target entry - struct fact_entry* pmem_te; // pmem target entry - u64 index = 0; - u64 target_index; - int ret=0; - - /* Index SIZE */ - /* 4GB Environment - 19 bit */ - index = lookup->fingerprint[0]; - index = index<<8 | lookup->fingerprint[1]; - index = index<<3 | ((lookup->fingerprint[2] & 224)>>5); - - /* 1TB Environment - 27 bit */ - /* - index = lookup->fingerprint[0]; - index = index << 8 | lookup->fingerprint[1]; - index = index << 8 | lookup->fingerprint[2]; - index = index << 3 | ((lookup->fingerprint[3] & 224)>>5); - */ - - if(nova_dedup_FACT_index_check(index)) - return 2; - - // Read Entries until it finds a match, or finds a empty slot - do{ - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; - pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); - __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); - if(strncmp(te.fingerprint, lookup->fingerprint,FINGERPRINT_SIZE) == 0){ // duplicate - ret = 1; - break; - } - if(nova_dedup_is_empty(te)){ // unique - ret =0; - break; - } - - // TODO add pointer to the entry and add a new entry at the end of fact table - // 1. Get new available index --> new function needed - // 2. Set 'next' as the index - // 3. return the entry in that index - - } - while(0); - - // - if(ret){ // duplicate data page detected - if((te.count & ((1<<4)-1)) == ((1<<4)-1)){ - printk("ERRO: more than 16 updates to this entry\n"); - return -1; - } - te.count++; - printk("Duplicate Page detected, count is %d\n",te.count); - } - else{ // new entry should be written - strncpy(te.fingerprint,lookup->fingerprint,FINGERPRINT_SIZE); - te.block_address = lookup->block_address; - te.count=1; - te.next = 0; - } - - // copy target_entry to pmem - nova_memunlock_range(sb,pmem_te, NOVA_FACT_ENTRY_SIZE, &irq_flags); - memcpy_to_pmem_nocache(pmem_te, &te, NOVA_FACT_ENTRY_SIZE - 4); // don't write 'delete' area - nova_memlock_range(sb, pmem_te, NOVA_FACT_ENTRY_SIZE, &irq_flags); - - // update lookup data - lookup->index = index; - lookup->block_address = te.block_address; - - if(nova_dedup_FACT_index_check(te.block_address)) - return 2; - - // Add FACT entry for delete - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + te.block_address * NOVA_FACT_ENTRY_SIZE; - pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); - __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); - - te.delete_target = index; - - nova_memunlock_range(sb,pmem_te,NOVA_FACT_ENTRY_SIZE,&irq_flags); - memcpy_to_pmem_nocache(pmem_te,&te,NOVA_FACT_ENTRY_SIZE); - nova_memlock_range(sb,pmem_te, NOVA_FACT_ENTRY_SIZE,&irq_flags); - - return ret; + unsigned long irq_flags=0; + struct fact_entry te; // target entry + struct fact_entry* pmem_te; // pmem target entry + u64 index = 0; + u64 target_index; + int ret=0; + + /* Index SIZE */ + /* 4GB Environment - 19 bit */ + /* + index = lookup->fingerprint[0]; + index = index<<8 | lookup->fingerprint[1]; + index = index<<3 | ((lookup->fingerprint[2] & 224)>>5); + */ + /* 1TB, 750GB Environment - 27 bit */ + + index = lookup->fingerprint[0]; + index = index << 8 | lookup->fingerprint[1]; + index = index << 8 | lookup->fingerprint[2]; + index = index << 3 | ((lookup->fingerprint[3] & 224)>>5); + + // Index out of range + if(nova_dedup_FACT_index_check(index)) + return 2; + + // Read Entries until it finds a match, or finds a empty slot + do{ + target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; + pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); + __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); + if(strncmp(te.fingerprint, lookup->fingerprint,FINGERPRINT_SIZE) == 0){ // duplicate found + ret = 1; + break; + } + if(nova_dedup_is_empty(te)){ // duplicate not found, it's unique! + ret =0; + break; + } + + // TODO add pointer to the entry and add a new entry at the end of fact table + // 1. Get new available index --> new function needed + // 2. Set 'next' as the index + // 3. return the entry in that index + + } + while(0); + + // + if(ret){ // duplicate data page detected + te.count++; // Increase Update Count + printk("Duplicate Page detected,index: %lld, ref_count, Up_Count is %lld %lld\n",index,te.count>>32, te.count); + } + else{ // new entry should be written + strncpy(te.fingerprint,lookup->fingerprint,FINGERPRINT_SIZE); + te.block_address = lookup->block_address; + te.count=1; + te.next = 0; + } + + // copy target_entry to pmem + nova_memunlock_range(sb,pmem_te, NOVA_FACT_ENTRY_SIZE, &irq_flags); + memcpy_to_pmem_nocache(pmem_te, &te, NOVA_FACT_ENTRY_SIZE - 20); // don't write delete, lock, pdding + nova_memlock_range(sb, pmem_te, NOVA_FACT_ENTRY_SIZE, &irq_flags); + + // update lookup data(used in deduplication process) + lookup->index = index; + lookup->block_address = te.block_address; + + // Check range + if(nova_dedup_FACT_index_check(te.block_address)) + return 2; + + // Add 'delete entry' + if(ret == 0){ + target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + te.block_address * NOVA_FACT_ENTRY_SIZE; + pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); + + nova_memunlock_range(sb,pmem_te,NOVA_FACT_ENTRY_SIZE,&irq_flags); + PERSISTENT_BARRIER(); + pmem_te->delete_entry = index; + nova_flush_buffer(&pmem_te->delete_entry,CACHELINE_SIZE,1); + nova_memlock_range(sb,pmem_te, NOVA_FACT_ENTRY_SIZE,&irq_flags); + } + return ret; } +int nova_dedup_TWE_update(struct super_block *sb,struct nova_inode_info_header *sih,u64 curr_p, short *duplicate_check){ + int i; + unsigned long irq_flags=0; + unsigned int num=0; + unsigned long start_index; + unsigned long curr_index; + + void *addr; + struct nova_file_write_entry *entry; + + addr = (void*)nova_get_block(sb,curr_p); + entry = (struct nova_file_write_entry *)addr; + // Update dedup flag to 'in_process' + nova_memunlock_range(sb,entry,CACHELINE_SIZE,&irq_flags); + entry->dedup_flag = 2; + nova_flush_buffer(&entry->dedup_flag,CACHELINE_SIZE,1); + // Update unique FACT entry counts + num = entry->num_pages; + start_index = entry->block >> PAGE_SHIFT; + for(i=0;idedup_flag = 0; + nova_flush_buffer(&entry->dedup_flag,CACHELINE_SIZE,1); + + return 0; +} // Update FACT table + dedup_flags in write entry int nova_dedup_entry_update(struct super_block *sb, struct nova_inode_info_header *sih, u64 begin_tail){ - void *addr; - struct nova_file_write_entry *entry; - u64 curr_p = begin_tail; - size_t entry_size = sizeof(struct nova_file_write_entry); - unsigned long irq_flags=0; - unsigned long curr_index; - unsigned long start_index; - unsigned int num=0; - int i; - - while(curr_p && curr_p != sih->log_tail){ - if(is_last_entry(curr_p,entry_size)) - curr_p = next_log_page(sb,curr_p); - if(curr_p ==0) - break; - addr = (void*) nova_get_block(sb,curr_p); - entry = (struct nova_file_write_entry *)addr; - - num = entry->num_pages; - start_index = entry->block >> PAGE_SHIFT; - for(i=0;idedup_flag=0; - nova_update_entry_csum(entry); - nova_update_alter_entry(sb,entry); - nova_memlock_range(sb,entry,CACHELINE_SIZE,&irq_flags); - curr_p += entry_size; - } - return 0; + void *addr; + struct nova_file_write_entry *entry; + u64 curr_p = begin_tail; + size_t entry_size = sizeof(struct nova_file_write_entry); + unsigned long irq_flags=0; + unsigned long curr_index; + unsigned long start_index; + unsigned int num=0; + int i; + + while(curr_p && curr_p != sih->log_tail){ + if(is_last_entry(curr_p,entry_size)) + curr_p = next_log_page(sb,curr_p); + if(curr_p ==0) + break; + addr = (void*) nova_get_block(sb,curr_p); + entry = (struct nova_file_write_entry *)addr; + + num = entry->num_pages; + start_index = entry->block >> PAGE_SHIFT; + for(i=0;idedup_flag=0; // Dedup finish + nova_flush_buffer(&entry->dedup_flag,CACHELINE_SIZE,1); + + nova_update_entry_csum(entry); + nova_update_alter_entry(sb,entry); + nova_memlock_range(sb,entry,CACHELINE_SIZE,&irq_flags); + curr_p += entry_size; + } + return 0; } @@ -437,295 +407,273 @@ int nova_dedup_entry_update(struct super_block *sb, struct nova_inode_info_heade // Return 0 if it's not okay to delete - reference count > 0 // Return 2 if it's not in FACT table - reference count < 0 int nova_dedup_is_duplicate(struct super_block *sb, unsigned long blocknr, bool check){ - unsigned long irq_flags=0; - struct fact_entry te; // target entry - struct fact_entry* pmem_te; // pmem target entry - u64 index = 0; - u64 target_index; - int ret=0; - - if(nova_dedup_FACT_index_check(blocknr)) - return 3; - - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + blocknr * NOVA_FACT_ENTRY_SIZE; - - pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); - __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); - - index = te.delete_target; - - if(nova_dedup_FACT_index_check(index)) - return 2; - - target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; - pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); - __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); + unsigned long irq_flags=0; + struct fact_entry te; // target entry + struct fact_entry* pmem_te; // pmem target entry + u64 index = 0; + u64 target_index; + int ret=0; + + // Check Index Range of delete entry + if(nova_dedup_FACT_index_check(blocknr)) + return 3; + + target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + blocknr * NOVA_FACT_ENTRY_SIZE; + + pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); + __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); + + index = te.delete_entry; + + // Check Index Range of target FACT entry + if(nova_dedup_FACT_index_check(index)) + return 2; + + target_index = NOVA_DEF_BLOCK_SIZE_4K * FACT_TABLE_START + index * NOVA_FACT_ENTRY_SIZE; + pmem_te = (struct fact_entry*)nova_get_block(sb,target_index); + __copy_to_user(&te,pmem_te,sizeof(struct fact_entry)); + + ret = te.count>>32; + + if(ret <= 0){ // It's not in dedup table + return 2; + } + else{ // It's okay to delete, this entry can also be deleted + if(!check){ + nova_memunlock_range(sb,pmem_te, NOVA_FACT_ENTRY_SIZE, &irq_flags); + PERSISTENT_BARRIER(); + pmem_te->count -= ((unsigned long)1<<32); // Update Reference Count + nova_flush_buffer(&pmem_te->count,CACHELINE_SIZE,1); + nova_memlock_range(sb, pmem_te, NOVA_FACT_ENTRY_SIZE, &irq_flags); + } + if(ret == 1) // Can delete + return 1; + else + return 0; // Can't delete + } +} - ret = te.count >> 4; - if(ret <= 0){ // It's not in dedup table - return 2; +/******************** DEDUPLICATION MAIN FUNCTION ********************/ +int nova_dedup_test(struct file * filp){ + // Read Super Block + struct address_space *mapping = filp->f_mapping; + struct inode *garbage_inode = mapping->host; + struct super_block *sb = garbage_inode->i_sb; + + // For read phase + struct nova_file_write_entry *target_entry; // Target write entry to deduplicate + struct inode *target_inode; // Inode of target write entry + u64 entry_address; // Address of target write entry(TWE) + u64 target_inode_number=0; // Number of target inode (TI) + struct nova_inode *target_pi, inode_copy; // nova_inode of TI + struct nova_inode_info *target_si; + struct nova_inode_info_header *target_sih; + + unsigned char *buf; // Read Buffer + unsigned char *fingerprint; // Fingerprint result + + unsigned long left; + pgoff_t index; + int i, j, num_pages =0; + unsigned long nvmm; + void *dax_mem = NULL; + + // For write phase + int num_new_entry=0; + struct fingerprint_lookup_data *lookup_data; + struct nova_inode_update update; + struct nova_file_write_entry entry_data; // new write entry + short *duplicate_check; + u64 file_size; + unsigned long original_start_blk, start_blk; + unsigned long blocknr =0; + unsigned long num_blocks =0; + unsigned long irq_flags=0; + u64 begin_tail =0; + u64 epoch_id; + u32 time; + u32 valid_page_num=0; + ssize_t ret=0; + + // kmalloc buf, fingerprint + buf = kmalloc(DATABLOCK_SIZE,GFP_KERNEL); + fingerprint = kmalloc(FINGERPRINT_SIZE,GFP_KERNEL); + + do{ + printk("----------DEDUP START----------\n"); + // Pop TWE(Target Write Entry) + entry_address = nova_dedup_queue_get_next_entry(&target_inode_number); + // target_inode_number should exist + if (target_inode_number < NOVA_NORMAL_INODE_START && target_inode_number != NOVA_ROOT_INO) { + //nova_info("%s: invalid inode %llu.", __func__,target_inode_number); + printk("No entry\n"); + continue; + } + // Read TI(Target Inode) + target_inode = nova_iget(sb,target_inode_number); + // Inode Could've been deleted + if(target_inode == ERR_PTR(-ESTALE)){ + nova_info("%s: inode %llu does not exist.", __func__,target_inode_number); + continue; + } + + if(entry_address!=0){ + //Initialize variables + ret =0; + num_new_entry=0; + valid_page_num=0; + original_start_blk = 0; + begin_tail=0; + irq_flags=0; + + target_si = NOVA_I(target_inode); + target_sih = &target_si->header; + target_pi = nova_get_inode(sb,target_inode); + + // ---------------------------Lock Acquire--------------------------------------------------------------- + sb_start_write(target_inode->i_sb); + inode_lock(target_inode); + + // Read TWE + target_entry = nova_get_block(sb, entry_address); + original_start_blk = target_entry->pgoff; + + index = target_entry->pgoff; + num_pages = target_entry->num_pages; + lookup_data = kmalloc(num_pages*sizeof(struct fingerprint_lookup_data),GFP_KERNEL); + duplicate_check = kmalloc(sizeof(short)*num_pages,GFP_KERNEL); + memset(duplicate_check,false,sizeof(short)*num_pages); + + // Read Each Data Page from TWE + for(i=0;if_mapping; - struct inode *garbage_inode = mapping->host; - struct super_block *sb = garbage_inode->i_sb; - - // For read phase - struct nova_file_write_entry *target_entry; // Target write entry to deduplicate - struct inode *target_inode; // Inode of target write entry - u64 entry_address; // Address of target write entry(TWE) - u64 target_inode_number=0; // Number of target inode (TI) - struct nova_inode *target_pi, inode_copy; // nova_inode of TI - struct nova_inode_info *target_si; - struct nova_inode_info_header *target_sih; - - unsigned char *buf; // Read Buffer - unsigned char *fingerprint; // Fingerprint result - - unsigned long left; - pgoff_t index; - int i, j, num_pages =0; - unsigned long nvmm; - void *dax_mem = NULL; - - // For write phase - int num_new_entry=0; - int start, end; - struct fingerprint_lookup_data *lookup_data; - struct nova_inode_update update; - struct nova_file_write_entry entry_data; // new write entry - short *duplicate_check; - u64 file_size; - unsigned long original_start_blk, start_blk; - unsigned long blocknr =0; - unsigned long num_blocks =0; - unsigned long irq_flags=0; - u64 begin_tail =0; - u64 epoch_id; - u32 time; - u32 valid_page_num=0; - ssize_t ret=0; - - // kmalloc buf, fingerprint - buf = kmalloc(DATABLOCK_SIZE,GFP_KERNEL); - fingerprint = kmalloc(FINGERPRINT_SIZE,GFP_KERNEL); - - do{ - printk("----------DEDUP START----------\n"); - // Pop TWE(Target Write Entry) - entry_address = nova_dedup_queue_get_next_entry(&target_inode_number); - // target_inode_number should exist - if (target_inode_number < NOVA_NORMAL_INODE_START && target_inode_number != NOVA_ROOT_INO) { - //nova_info("%s: invalid inode %llu.", __func__,target_inode_number); - printk("No entry\n"); - continue; - } - // Read TI(Target Inode) - target_inode = nova_iget(sb,target_inode_number); - // Inode Could've been deleted - if(target_inode == ERR_PTR(-ESTALE)){ - nova_info("%s: inode %llu does not exist.", __func__,target_inode_number); - continue; - } - - if(entry_address!=0){ - //Initialize variables - ret =0; - num_new_entry=0; - valid_page_num=0; - original_start_blk = 0; - begin_tail=0; - irq_flags=0; - - target_si = NOVA_I(target_inode); - target_sih = &target_si->header; - target_pi = nova_get_inode(sb,target_inode); - - // ---------------------------Lock Acquire--------------------------------------------------------------- - sb_start_write(target_inode->i_sb); - inode_lock(target_inode); - - // Read TWE - target_entry = nova_get_block(sb, entry_address); - original_start_blk = target_entry->pgoff; - - index = target_entry->pgoff; - num_pages = target_entry->num_pages; - lookup_data = kmalloc(num_pages*sizeof(struct fingerprint_lookup_data),GFP_KERNEL); - duplicate_check = kmalloc(sizeof(short)*num_pages,GFP_KERNEL); - memset(duplicate_check,false,sizeof(short)*num_pages); - - //printk("write entry: num_pages:%d, block(address): %lld, pgoff(of file): %lld\n",target_entry->num_pages, target_entry->block, target_entry->pgoff); - - // Read Each Data Page from TWE - for(i=0;iino,target_sih->pi_addr, - target_sih->alter_pi_addr, &inode_copy,0) <0){ - ret = -EIO; - goto out; - } - - // set time - target_inode->i_ctime = current_time(target_inode); - time = current_time(target_inode).tv_sec; - - epoch_id = nova_get_epoch_id(sb); - update.tail = target_sih->log_tail; - update.alter_tail = target_sih->alter_log_tail; - file_size = cpu_to_le64(target_inode -> i_size); - for(i=0;ii_blocks = target_sih->i_blocks; - target_sih->trans_id++; - - - //i_size_write(target_inode, file_size); - //target_sih->i_size = file_size; + // Get the number of new write entries needed to be appended. + num_new_entry = nova_dedup_num_new_write_entry(duplicate_check,num_pages); + if(num_new_entry == 0){ + printk("All Unique Data Pages\n"); + nova_dedup_TWE_update(sb,target_sih,entry_address,duplicate_check); + goto out; + } + + // ------------------- Write Phase ----------------------- + if(nova_check_inode_integrity(sb,target_sih->ino,target_sih->pi_addr, + target_sih->alter_pi_addr, &inode_copy,0) <0){ + ret = -EIO; + goto out; + } + + // set time + target_inode->i_ctime = current_time(target_inode); + time = current_time(target_inode).tv_sec; + + epoch_id = nova_get_epoch_id(sb); + update.tail = target_sih->log_tail; + update.alter_tail = target_sih->alter_log_tail; + file_size = cpu_to_le64(target_inode -> i_size); + + // Only add new write entries for duplicate data pages + for(i=0;ii_blocks = target_sih->i_blocks; + target_sih->trans_id++; out: - if(ret<0) - nova_cleanup_incomplete_write(sb,target_sih,blocknr,num_blocks,begin_tail,update.tail); - - // Unlock ------------------------------------------------------------ - inode_unlock(target_inode); - sb_end_write(target_inode->i_sb); - - kfree(lookup_data); - kfree(duplicate_check); - iput(target_inode); // Release Inode - } - else printk("no entry!\n"); - printk("----------DEDUP COMPLETE----------\n"); - }while(0); - - kfree(buf); - kfree(fingerprint); - return 0; + if(ret<0) + nova_cleanup_incomplete_write(sb,target_sih,blocknr,num_blocks,begin_tail,update.tail); + + // Unlock ------------------------------------------------------------ + inode_unlock(target_inode); + sb_end_write(target_inode->i_sb); + + kfree(lookup_data); + kfree(duplicate_check); + iput(target_inode); // Release Inode + } + else printk("no entry!\n"); + printk("----------DEDUP COMPLETE----------\n"); + }while(0); + + kfree(buf); + kfree(fingerprint); + return 0; } diff --git a/fs/nova/dedup.h b/fs/nova/dedup.h index 9b2e8f4b7227..259c0c175fbd 100644 --- a/fs/nova/dedup.h +++ b/fs/nova/dedup.h @@ -25,17 +25,17 @@ #define FINGERPRINT_SIZE 20 #define MAX_DATAPAGE_PER_WRITEENTRY 32 /* nova_dedup_queue - queue of entries that needs to be deduplicated - */ + queue of entries that needs to be deduplicated + */ struct nova_dedup_queue_entry{ - u64 write_entry_address; - u64 target_inode_number; - struct list_head list; + u64 write_entry_address; + u64 target_inode_number; + struct list_head list; }; struct nova_dedup_queue{ - struct nova_dedup_queue_entry head; // head of dqueue - struct mutex lock; + struct nova_dedup_queue_entry head; // head of dqueue + struct mutex lock; }; extern struct nova_dedup_queue dqueue; @@ -48,21 +48,24 @@ struct sdesc { /* FACT table entry */ struct fact_entry{ - unsigned char fingerprint[FINGERPRINT_SIZE]; - u64 block_address; - u32 count; // 28bit -> reference, 4bit -> update - u32 next; - u32 delete_target; + u64 count; // 32bit reference count, 32bit update count + unsigned char fingerprint[FINGERPRINT_SIZE]; + u64 block_address; + u64 next; + u64 delete_entry; + u32 lock; + u64 padding; }__attribute((__packed__)); /* For Fingerprint lookup */ struct fingerprint_lookup_data{ - unsigned char fingerprint[FINGERPRINT_SIZE]; // fingerprint of entry - u32 index; // index of entry - u64 block_address; // Actual address of this entry(where the data block is) + unsigned char fingerprint[FINGERPRINT_SIZE]; // fingerprint of entry + u64 index; // index of entry + u64 block_address; // Actual address of this entry(where the data block is) }; +int nova_dedup_FACT_init(struct super_block *sb); int nova_dedup_test(struct file *); int nova_dedup_queue_push(u64,u64); diff --git a/fs/nova/nova_def.h b/fs/nova/nova_def.h index 9af2d8aed7ca..7460a9dc2b91 100644 --- a/fs/nova/nova_def.h +++ b/fs/nova/nova_def.h @@ -153,6 +153,6 @@ static inline void nova_flush_buffer(void *buf, uint32_t len, bool fence) #define NOVA_STRIPE_SIZE (1 << NOVA_STRIPE_SHIFT) /* NOVA DEDUP KHJ */ -#define NOVA_FACT_ENTRY_SIZE 40 +#define NOVA_FACT_ENTRY_SIZE 64 #endif /* _LINUX_NOVA_DEF_H */ diff --git a/fs/nova/super.c b/fs/nova/super.c index e4009717a120..4281969c7f7f 100644 --- a/fs/nova/super.c +++ b/fs/nova/super.c @@ -490,8 +490,9 @@ static struct nova_inode *nova_init(struct super_block *sb, /* NOVA DEDUP KHJ */ nova_dedup_queue_init(); nova_info("NOVA DEDUP KHJ: dedup_queue init\n"); - - + //nova_dedup_FACT_init(sb); + //nova_info("Initialize FACT\n"); + nova_info("NOVA initialization finish\n"); return root_i; } diff --git a/fs/nova/super.h b/fs/nova/super.h index b61c39ba2e55..0b7bb0452df8 100644 --- a/fs/nova/super.h +++ b/fs/nova/super.h @@ -62,13 +62,18 @@ struct nova_super_block { * Block 64 - 5183 Static FACT Table (20MB) * Index 0 ~ (2^20 - 1) + * 750G Environment + * Block 64 ~ 3072063 (almost 12GB) + * Index 0 ~ *(750*2^18 - 1) + * 1T Environment * Block 64 ~ 1310783 Static FACT Table (5GB) * Index 0 ~ (2^28 - 1) --> 1T Environment */ -#define FACT_TABLE_INDEX_MAX 1048575 // 2^20 - 1 (4G ENV) +// #define FACT_TABLE_INDEX_MAX 1048575 // 2^20 - 1 (4G ENV) // #define FACT_TABLE_INDEX_MAX 268435455 // 2^28 -1 (1TB ENV) -#define HEAD_RESERVED_BLOCKS 63 + ((FACT_TABLE_INDEX_MAX+1)*20)/4096 +#define FACT_TABLE_INDEX_MAX (unsigned long)196607999 // 750 * 2^18 - 1(750GB ENV) +#define HEAD_RESERVED_BLOCKS (unsigned long)63 + (((unsigned long)FACT_TABLE_INDEX_MAX+1)*64)/4096 // 20 - FACT entry size // 4096 - Block size #define NUM_JOURNAL_PAGES 16