From 76c9e9a3c396592415b316037fee2e753552329d Mon Sep 17 00:00:00 2001 From: Etienne <77254506+etienneptl@users.noreply.github.com> Date: Tue, 30 Mar 2021 22:08:11 +1100 Subject: [PATCH] WT-7312 Keys/Values updated to String type and save the created keys (#6424) * Fixed configuration parsing * Removed key_format and value_format parameters from the test configuration * Key and values are now of type string. * Created a Database struct to represent the data model to keep track of the collections, keys and values during a test. Keys are now using the key_size given in the test configuration during the populate stage. --- dist/test_data.py | 4 - src/config/test_config.c | 24 +- .../configs/config_poc_test_default.txt | 2 - test/cppsuite/test_harness/api_const.h | 2 + test/cppsuite/test_harness/configuration.h | 2 +- test/cppsuite/test_harness/database_model.h | 72 ++++ test/cppsuite/test_harness/test.h | 2 +- .../test_harness/workload_generator.h | 84 ++++- .../cppsuite/test_harness/workload_tracking.h | 4 +- .../test_harness/workload_validation.h | 341 +++++++++--------- 10 files changed, 324 insertions(+), 213 deletions(-) create mode 100644 test/cppsuite/test_harness/database_model.h diff --git a/dist/test_data.py b/dist/test_data.py index 863d58bb358..1cbb60223d1 100644 --- a/dist/test_data.py +++ b/dist/test_data.py @@ -48,12 +48,8 @@ def __ge__(self, other): record_config = [ Config('key_size', 0, r''' The size of the keys created''', min=0, max=10000), - Config('key_format', 'i', r''' - The format of the keys in the database'''), Config('value_size', 0, r''' The size of the values created''', min=0, max=1000000000), - Config('value_format', 'S', r''' - The format of the values stored in the database.''') ] # diff --git a/src/config/test_config.c b/src/config/test_config.c index 1b42bbee4d1..c3bed3caa97 100644 --- a/src/config/test_config.c +++ b/src/config/test_config.c @@ -17,9 +17,7 @@ static const WT_CONFIG_CHECK confchk_timestamp_manager_subconfigs[] = { {"stable_lag", "int", NULL, "min=0,max=1000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_insert_config_subconfigs[] = { - {"key_format", "string", NULL, NULL, NULL, 0}, {"key_size", "int", NULL, "min=0,max=10000", NULL, 0}, - {"value_format", "string", NULL, NULL, NULL, 0}, {"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_ops_per_transaction_subconfigs[] = { @@ -27,23 +25,19 @@ static const WT_CONFIG_CHECK confchk_ops_per_transaction_subconfigs[] = { {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_update_config_subconfigs[] = { - {"key_format", "string", NULL, NULL, NULL, 0}, {"key_size", "int", NULL, "min=0,max=10000", NULL, 0}, - {"value_format", "string", NULL, NULL, NULL, 0}, {"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_workload_generator_subconfigs[] = { {"collection_count", "int", NULL, "min=0,max=200000", NULL, 0}, - {"insert_config", "category", NULL, NULL, confchk_insert_config_subconfigs, 4}, + {"insert_config", "category", NULL, NULL, confchk_insert_config_subconfigs, 2}, {"insert_threads", "int", NULL, "min=0,max=20", NULL, 0}, {"key_count", "int", NULL, "min=0,max=1000000", NULL, 0}, - {"key_format", "string", NULL, NULL, NULL, 0}, {"key_size", "int", NULL, "min=0,max=10000", NULL, 0}, {"ops_per_transaction", "category", NULL, NULL, confchk_ops_per_transaction_subconfigs, 2}, {"read_threads", "int", NULL, "min=0,max=100", NULL, 0}, - {"update_config", "category", NULL, NULL, confchk_update_config_subconfigs, 4}, + {"update_config", "category", NULL, NULL, confchk_update_config_subconfigs, 2}, {"update_threads", "int", NULL, "min=0,max=20", NULL, 0}, - {"value_format", "string", NULL, NULL, NULL, 0}, {"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_workload_tracking_subconfigs[] = { @@ -55,7 +49,7 @@ static const WT_CONFIG_CHECK confchk_poc_test[] = { {"enable_logging", "boolean", NULL, NULL, NULL, 0}, {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 2}, {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 3}, - {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 12}, + {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 10}, {"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 1}, {NULL, NULL, NULL, NULL, NULL, 0}}; @@ -65,13 +59,11 @@ static const WT_CONFIG_ENTRY config_entries[] = { "runtime_monitor=(rate_per_second=1," "stat_cache_size=(enabled=false,limit=))," "timestamp_manager=(enabled=false,oldest_lag=0,stable_lag=0)," - "workload_generator=(collection_count=1," - "insert_config=(key_format=i,key_size=0,value_format=S," - "value_size=0),insert_threads=0,key_count=0,key_format=i," - "key_size=0,ops_per_transaction=(max=1,min=),read_threads=0," - "update_config=(key_format=i,key_size=0,value_format=S," - "value_size=0),update_threads=0,value_format=S,value_size=0)," - "workload_tracking=(enabled=false)", + "workload_generator=(collection_count=1,insert_config=(key_size=0" + ",value_size=0),insert_threads=0,key_count=0,key_size=0," + "ops_per_transaction=(max=1,min=),read_threads=0," + "update_config=(key_size=0,value_size=0),update_threads=0," + "value_size=0),workload_tracking=(enabled=false)", confchk_poc_test, 7}, {NULL, NULL, NULL, 0}}; diff --git a/test/cppsuite/configs/config_poc_test_default.txt b/test/cppsuite/configs/config_poc_test_default.txt index 52f4f536876..7d529bd04fd 100644 --- a/test/cppsuite/configs/config_poc_test_default.txt +++ b/test/cppsuite/configs/config_poc_test_default.txt @@ -23,7 +23,6 @@ workload_generator= { collection_count=2 key_count=5 - key_format=i key_size=1 ops_per_transaction= { @@ -32,7 +31,6 @@ workload_generator= } read_threads=1 value_size=10 - value_format=S } workload_tracking= { diff --git a/test/cppsuite/test_harness/api_const.h b/test/cppsuite/test_harness/api_const.h index 46a6a775677..82eadc8a0ab 100644 --- a/test/cppsuite/test_harness/api_const.h +++ b/test/cppsuite/test_harness/api_const.h @@ -45,6 +45,7 @@ static const char *DURATION_SECONDS = "duration_seconds"; static const char *ENABLED = "enabled"; static const char *ENABLE_LOGGING = "enable_logging"; static const char *KEY_COUNT = "key_count"; +static const char *KEY_SIZE = "key_size"; static const char *LIMIT = "limit"; static const char *MAX = "max"; static const char *MIN = "min"; @@ -63,6 +64,7 @@ static const char *OLDEST_TS = "oldest_timestamp"; static const char *STABLE_TS = "stable_timestamp"; /* Test harness consts. */ +static const char *DEFAULT_FRAMEWORK_SCHEMA = "key_format=S,value_format=S"; static const char *TABLE_OPERATION_TRACKING = "table:operation_tracking"; static const char *TABLE_SCHEMA_TRACKING = "table:schema_tracking"; static const char *STATISTICS_URI = "statistics:"; diff --git a/test/cppsuite/test_harness/configuration.h b/test/cppsuite/test_harness/configuration.h index adae5b1b8c5..5cf3f9f1fbe 100644 --- a/test/cppsuite/test_harness/configuration.h +++ b/test/cppsuite/test_harness/configuration.h @@ -83,7 +83,7 @@ class configuration { { WT_CONFIG_ITEM temp_value; testutil_check(_config_parser->get(_config_parser, key.c_str(), &temp_value)); - if (temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING || + if (temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING && temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID) return (-1); value = std::string(temp_value.str, temp_value.len); diff --git a/test/cppsuite/test_harness/database_model.h b/test/cppsuite/test_harness/database_model.h new file mode 100644 index 00000000000..ba4cb4001d5 --- /dev/null +++ b/test/cppsuite/test_harness/database_model.h @@ -0,0 +1,72 @@ +/*- + * Public Domain 2014-present MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DATABASE_MODEL_H +#define DATABASE_MODEL_H + +#include +#include + +namespace test_harness { + +/* Key/Value type. */ +typedef std::string key_value_t; + +/* Representation of key states. */ +struct key_t { + bool exists; +}; + +/* Representation of a value. */ +struct value_t { + key_value_t value; +}; + +/* A collection is made of mapped Key objects. */ +struct collection_t { + std::map keys; + std::map *values; +}; + +/* Representation of the collections in memory. */ +class database { + public: + const std::vector + get_collection_names() const + { + std::vector collection_names; + for (auto const &it : collections) + collection_names.push_back(it.first); + return (collection_names); + } + + std::map collections; +}; +} // namespace test_harness + +#endif diff --git a/test/cppsuite/test_harness/test.h b/test/cppsuite/test_harness/test.h index e11d17ab51b..e6e1aebe12e 100644 --- a/test/cppsuite/test_harness/test.h +++ b/test/cppsuite/test_harness/test.h @@ -139,7 +139,7 @@ class test { if (_workload_tracking->is_enabled()) { workload_validation wv; is_success = wv.validate(_workload_tracking->get_operation_table_name(), - _workload_tracking->get_schema_table_name()); + _workload_tracking->get_schema_table_name(), _workload_generator->get_database()); } debug_print(is_success ? "SUCCESS" : "FAILED", DEBUG_INFO); diff --git a/test/cppsuite/test_harness/workload_generator.h b/test/cppsuite/test_harness/workload_generator.h index f9445cd892a..885cd128bf7 100644 --- a/test/cppsuite/test_harness/workload_generator.h +++ b/test/cppsuite/test_harness/workload_generator.h @@ -33,6 +33,7 @@ #include #include +#include "database_model.h" #include "random_generator.h" #include "workload_tracking.h" @@ -68,20 +69,21 @@ class workload_generator : public component { * - Open a cursor on each collection. * - Insert m key/value pairs in each collection. Values are random strings which size is * defined by the configuration. + * - Store in memory the created collections and the generated keys that were inserted. */ void - populate() + populate(database &database) { WT_CURSOR *cursor; WT_SESSION *session; wt_timestamp_t ts; - int64_t collection_count, key_count, value_size; - std::string collection_name, config, generated_value, home; + int64_t collection_count, key_count, key_cpt, key_size, value_size; + std::string collection_name, config, home; + key_value_t generated_key, generated_value; bool ts_enabled = _timestamp_manager->is_enabled(); cursor = nullptr; - collection_count = key_count = value_size = 0; - collection_name = ""; + collection_count = key_count = key_size = value_size = 0; /* Get a session. */ session = connection_manager::instance().create_session(); @@ -89,23 +91,34 @@ class workload_generator : public component { testutil_check(_config->get_int(COLLECTION_COUNT, collection_count)); for (int i = 0; i < collection_count; ++i) { collection_name = "table:collection" + std::to_string(i); - testutil_check(session->create(session, collection_name.c_str(), DEFAULT_TABLE_SCHEMA)); + database.collections[collection_name] = {}; + testutil_check( + session->create(session, collection_name.c_str(), DEFAULT_FRAMEWORK_SCHEMA)); ts = _timestamp_manager->get_next_ts(); testutil_check(_tracking->save(tracking_operation::CREATE, collection_name, 0, "", ts)); - _collection_names.push_back(collection_name); } debug_print(std::to_string(collection_count) + " collections created", DEBUG_TRACE); /* Open a cursor on each collection and use the configuration to insert key/value pairs. */ testutil_check(_config->get_int(KEY_COUNT, key_count)); testutil_check(_config->get_int(VALUE_SIZE, value_size)); - testutil_assert(value_size >= 0); - for (const auto &collection_name : _collection_names) { + testutil_assert(value_size > 0); + testutil_check(_config->get_int(KEY_SIZE, key_size)); + testutil_assert(key_size > 0); + /* Keys must be unique. */ + testutil_assert(key_count <= pow(10, key_size)); + + for (const auto &it_collections : database.collections) { + collection_name = it_collections.first; + key_cpt = 0; /* WiredTiger lets you open a cursor on a collection using the same pointer. When a * session is closed, WiredTiger APIs close the cursors too. */ testutil_check( session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); for (size_t j = 0; j < key_count; ++j) { + /* Generation of a unique key. */ + generated_key = number_to_string(key_size, key_cpt); + ++key_cpt; /* * Generation of a random string value using the size defined in the test * configuration. @@ -115,11 +128,16 @@ class workload_generator : public component { ts = _timestamp_manager->get_next_ts(); if (ts_enabled) testutil_check(session->begin_transaction(session, "")); - testutil_check(insert(cursor, collection_name, j + 1, generated_value.c_str(), ts)); + testutil_check(insert( + cursor, collection_name, generated_key.c_str(), generated_value.c_str(), ts)); if (ts_enabled) { config = std::string(COMMIT_TS) + "=" + _timestamp_manager->decimal_to_hex(ts); testutil_check(session->commit_transaction(session, config.c_str())); } + /* Update the memory representation of the collections. */ + database.collections[collection_name].keys[generated_key].exists = true; + /* Values are not stored here. */ + database.collections[collection_name].values = nullptr; } } debug_print("Populate stage done", DEBUG_TRACE); @@ -132,9 +150,10 @@ class workload_generator : public component { configuration *sub_config; int64_t read_threads, min_operation_per_transaction, max_operation_per_transaction, value_size; + std::vector collection_names; /* Populate the database. */ - populate(); + populate(_database); /* Retrieve useful parameters from the test configuration. */ testutil_check(_config->get_int(READ_THREADS, read_threads)); @@ -147,11 +166,13 @@ class workload_generator : public component { delete sub_config; + collection_names = _database.get_collection_names(); + /* Generate threads to execute read operations on the collections. */ for (int i = 0; i < read_threads; ++i) { - thread_context *tc = new thread_context(_timestamp_manager, _tracking, - _collection_names, thread_operation::READ, max_operation_per_transaction, - min_operation_per_transaction, value_size); + thread_context *tc = new thread_context(_timestamp_manager, _tracking, collection_names, + thread_operation::READ, max_operation_per_transaction, min_operation_per_transaction, + value_size); _workers.push_back(tc); _thread_manager.add_thread(tc, &execute_operation); } @@ -167,6 +188,12 @@ class workload_generator : public component { debug_print("Workload generator: run stage done", DEBUG_TRACE); } + database & + get_database() + { + return _database; + } + /* Workload threaded operations. */ static void execute_operation(thread_context &context) @@ -205,8 +232,9 @@ class workload_generator : public component { WT_CURSOR *cursor; wt_timestamp_t ts; std::vector cursors; + std::string collection_name; std::vector collection_names; - std::string generated_value; + key_value_t generated_value, key; bool has_committed = true; int64_t cpt, value_size = context.get_value_size(); @@ -223,11 +251,13 @@ class workload_generator : public component { context.begin_transaction(session, ""); ts = context.set_commit_timestamp(session); cpt = 0; + /* The key to update is hard coded to 1 for now. */ + key = 1; for (const auto &it : cursors) { + collection_name = collection_names[cpt]; generated_value = random_generator::random_generator::instance().generate_string(value_size); - /* Key is hard coded for now. */ - testutil_check(update(context.get_tracking(), it, collection_names[cpt], 1, + testutil_check(update(context.get_tracking(), it, collection_name, key.c_str(), generated_value.c_str(), ts)); ++cpt; } @@ -265,7 +295,8 @@ class workload_generator : public component { /* WiredTiger APIs wrappers for single operations. */ template int - insert(WT_CURSOR *cursor, const std::string &collection_name, K key, V value, wt_timestamp_t ts) + insert(WT_CURSOR *cursor, const std::string &collection_name, const K &key, const V &value, + wt_timestamp_t ts) { int error_code; @@ -322,7 +353,22 @@ class workload_generator : public component { } private: - std::vector _collection_names; + /* + * Convert a number to a string. If the resulting string is less than the given length, padding + * of '0' is added. + */ + static std::string + number_to_string(uint64_t size, uint64_t value) + { + std::string str, value_str = std::to_string(value); + testutil_assert(size >= value_str.size()); + uint64_t diff = size - value_str.size(); + std::string s(diff, '0'); + str = s.append(value_str); + return (str); + } + + database _database; thread_manager _thread_manager; timestamp_manager *_timestamp_manager; workload_tracking *_tracking; diff --git a/test/cppsuite/test_harness/workload_tracking.h b/test/cppsuite/test_harness/workload_tracking.h index d1464e60970..241ccf341d7 100644 --- a/test/cppsuite/test_harness/workload_tracking.h +++ b/test/cppsuite/test_harness/workload_tracking.h @@ -33,7 +33,7 @@ * Default schema for tracking operations on collections (key_format: Collection name / Key / * Timestamp, value_format: Operation type / Value) */ -#define OPERATION_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(Sii) +#define OPERATION_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(SSQ) #define OPERATION_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(iS) #define OPERATION_TRACKING_TABLE_CONFIG \ "key_format=" OPERATION_TRACKING_KEY_FORMAT ",value_format=" OPERATION_TRACKING_VALUE_FORMAT @@ -42,7 +42,7 @@ * Default schema for tracking schema operations on collections (key_format: Collection name / * Timestamp, value_format: Operation type) */ -#define SCHEMA_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(Si) +#define SCHEMA_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(SQ) #define SCHEMA_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(i) #define SCHEMA_TRACKING_TABLE_CONFIG \ "key_format=" SCHEMA_TRACKING_KEY_FORMAT ",value_format=" SCHEMA_TRACKING_VALUE_FORMAT diff --git a/test/cppsuite/test_harness/workload_validation.h b/test/cppsuite/test_harness/workload_validation.h index 86ff567bcc2..b96173d4779 100644 --- a/test/cppsuite/test_harness/workload_validation.h +++ b/test/cppsuite/test_harness/workload_validation.h @@ -35,112 +35,114 @@ extern "C" { #include "wiredtiger.h" } +#include "database_model.h" + namespace test_harness { + /* * Class that can validate database state and collection data. */ class workload_validation { public: /* - * Validate the on disk data against what has been tracked during the test. The first step is to - * replay the tracked operations so a representation in memory of the collections is created. - * This representation is then compared to what is on disk. The second step is to go through - * what has been saved on disk and make sure the memory representation has the same data. + * Validate the on disk data against what has been tracked during the test. + * - The first step is to replay the tracked operations so a representation in memory of the + * collections is created. This representation is then compared to what is on disk. + * - The second step is to go through what has been saved on disk and make sure the memory + * representation has the same data. * operation_table_name is the collection that contains all the operations about the key/value * pairs in the different collections used during the test. schema_table_name is the collection * that contains all the operations about the creation or deletion of collections during the * test. */ bool - validate(const std::string &operation_table_name, const std::string &schema_table_name) + validate(const std::string &operation_table_name, const std::string &schema_table_name, + database &database) { WT_SESSION *session; std::string collection_name; - /* - * Representation in memory of the collections at the end of the test. The first level is a - * map that contains collection names as keys. The second level is another map that contains - * the different key/value pairs within a given collection. If a collection yields to a null - * map of key/value pairs, this means the collection should not be present on disk. If a - * value from a key/value pair is null, this means the key should not be present in the - * collection on disk. - */ - std::map *> collections; /* Existing collections after the test. */ - std::vector created_collections; - bool is_valid; + std::vector created_collections, deleted_collections; + bool is_valid = true; session = connection_manager::instance().create_session(); - /* Retrieve the created collections that need to be checked. */ + /* Retrieve the collections that were created and deleted during the test. */ collection_name = schema_table_name; - created_collections = parse_schema_tracking_table(session, collection_name); + parse_schema_tracking_table( + session, collection_name, created_collections, deleted_collections); - /* Allocate memory to the operations performed on the created collections. */ + /* Make sure they exist in memory. */ for (auto const &it : created_collections) { - std::map *map = new std::map(); - collections[it] = map; + if (database.collections.count(it) == 0) { + debug_print("Collection missing in memory: " + it, DEBUG_ERROR); + is_valid = false; + break; + } } - /* - * Build in memory the final state of each created collection according to the tracked - * operations. - */ - collection_name = operation_table_name; - for (auto const &active_collection : created_collections) - parse_operation_tracking_table( - session, collection_name, active_collection, collections); - - /* Check all tracked operations in memory against the database on disk. */ - is_valid = check_reference(session, collections); + if (!is_valid) + return (is_valid); - /* Check what has been saved on disk against what has been tracked. */ - if (is_valid) { - for (auto const &collection : created_collections) { - is_valid = check_disk_state(session, collection, collections); - if (!is_valid) { - debug_print( - "check_disk_state failed for collection " + collection, DEBUG_ERROR); - break; - } + /* Make sure they don't exist in memory nor on disk. */ + for (auto const &it : deleted_collections) { + if (database.collections.count(it) > 0) { + debug_print( + "Collection present in memory while it has been tracked as deleted: " + it, + DEBUG_ERROR); + is_valid = false; + break; } + if (!verify_collection_state(session, it, false)) { + debug_print( + "Collection present on disk while it has been tracked as deleted: " + it, + DEBUG_ERROR); + is_valid = false; + break; + } + } - } else - debug_print("check_reference failed!", DEBUG_ERROR); - - /* Clean the allocated memory. */ - clean_memory(collections); - - return (is_valid); - } - - /* Clean the memory used to represent the collections after the test. */ - void - clean_memory(std::map *> &collections) - { - for (auto &it_collections : collections) { - if (it_collections.second == nullptr) - continue; + for (auto const &collection_name : database.get_collection_names()) { + if (!is_valid) + break; - for (auto &it_operations : *it_collections.second) { - delete it_operations.second; - it_operations.second = nullptr; + /* Get the values associated to the different keys in the current collection. */ + parse_operation_tracking_table( + session, operation_table_name, collection_name, database); + /* Check all tracked operations in memory against the database on disk. */ + if (!check_reference(session, collection_name, database)) { + debug_print( + "check_reference failed for collection " + collection_name, DEBUG_ERROR); + is_valid = false; } - delete it_collections.second; - it_collections.second = nullptr; + /* Check what has been saved on disk against what has been tracked. */ + else if (!check_disk_state(session, collection_name, database)) { + debug_print( + "check_disk_state failed for collection " + collection_name, DEBUG_ERROR); + is_valid = false; + } + /* Clear memory. */ + delete database.collections[collection_name].values; + database.collections[collection_name].values = nullptr; } + + return (is_valid); } + private: /* + * Read the tracking table to retrieve the created and deleted collections during the test. * collection_name is the collection that contains the operations on the different collections * during the test. */ - const std::vector - parse_schema_tracking_table(WT_SESSION *session, const std::string &collection_name) + void + parse_schema_tracking_table(WT_SESSION *session, const std::string &collection_name, + std::vector &created_collections, std::vector &deleted_collections) { WT_CURSOR *cursor; + wt_timestamp_t key_timestamp; const char *key_collection_name; - int key_timestamp, value_operation_type; - std::vector created_collections; + int value_operation_type; testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); @@ -154,16 +156,18 @@ class workload_validation { if (static_cast(value_operation_type) == tracking_operation::CREATE) { + deleted_collections.erase(std::remove(deleted_collections.begin(), + deleted_collections.end(), key_collection_name), + deleted_collections.end()); created_collections.push_back(key_collection_name); } else if (static_cast(value_operation_type) == tracking_operation::DELETE_COLLECTION) { created_collections.erase(std::remove(created_collections.begin(), created_collections.end(), key_collection_name), created_collections.end()); + deleted_collections.push_back(key_collection_name); } } - - return (created_collections); } /* @@ -174,32 +178,42 @@ class workload_validation { */ void parse_operation_tracking_table(WT_SESSION *session, const std::string &tracking_collection_name, - const std::string &collection_name, - std::map *> &collections) + const std::string &collection_name, database &database) { WT_CURSOR *cursor; - int error_code, exact, key, key_timestamp, value_operation_type; - const char *key_collection_name, *value; + wt_timestamp_t key_timestamp; + int exact, value_operation_type; + const char *key, *key_collection_name, *value; + std::vector collection_keys; + std::string key_str; + + /* Retrieve all keys from the given collection. */ + for (auto const &it : database.collections.at(collection_name).keys) + collection_keys.push_back(it.first); + /* There must be at least a key. */ + testutil_assert(!collection_keys.empty()); + /* Sort keys. */ + std::sort(collection_keys.begin(), collection_keys.end()); + /* Use the first key as a parameter for search_near. */ + key_str = collection_keys[0]; testutil_check( session->open_cursor(session, tracking_collection_name.c_str(), NULL, NULL, &cursor)); - /* Our keys start at 0. */ - cursor->set_key(cursor, collection_name.c_str(), 0); - error_code = cursor->search_near(cursor, &exact); - + cursor->set_key(cursor, collection_name.c_str(), key_str.c_str()); + testutil_check(cursor->search_near(cursor, &exact)); /* - * As we don't support deletion, the searched collection is expected to be found. Since the - * timestamp which is part of the key is not provided, exact is expected to be > 0. + * Since the timestamp which is part of the key is not provided, exact is expected to be + * greater than 0. */ - testutil_check(exact < 1); + testutil_assert(exact >= 0); - while (error_code == 0) { + do { testutil_check(cursor->get_key(cursor, &key_collection_name, &key, &key_timestamp)); testutil_check(cursor->get_value(cursor, &value_operation_type, &value)); debug_print("Collection name is " + std::string(key_collection_name), DEBUG_TRACE); - debug_print("Key is " + std::to_string(key), DEBUG_TRACE); + debug_print("Key is " + std::string(key), DEBUG_TRACE); debug_print("Timestamp is " + std::to_string(key_timestamp), DEBUG_TRACE); debug_print("Operation type is " + std::to_string(value_operation_type), DEBUG_TRACE); debug_print("Value is " + std::string(value), DEBUG_TRACE); @@ -217,141 +231,134 @@ class workload_validation { /* * Operations are parsed from the oldest to the most recent one. It is safe to * assume the key has been inserted previously in an existing collection and can be - * deleted safely. + * safely deleted. */ - delete collections.at(key_collection_name)->at(key); - collections.at(key_collection_name)->at(key) = nullptr; + database.collections.at(key_collection_name).keys.at(std::string(key)).exists = + false; + delete database.collections.at(key_collection_name).values; + database.collections.at(key_collection_name).values = nullptr; break; case tracking_operation::INSERT: { /* Keys are unique, it is safe to assume the key has not been encountered before. */ - std::pair pair(key, new std::string(value)); - collections.at(key_collection_name)->insert(pair); + database.collections[key_collection_name].keys[std::string(key)].exists = true; + if (database.collections[key_collection_name].values == nullptr) { + database.collections[key_collection_name].values = + new std::map(); + } + value_t v; + v.value = key_value_t(value); + std::pair pair(key_value_t(key), v); + database.collections[key_collection_name].values->insert(pair); break; } - case tracking_operation::CREATE: - case tracking_operation::DELETE_COLLECTION: - testutil_die(DEBUG_ABORT, "Unexpected operation in the tracking table: %d", - static_cast(value_operation_type)); default: - testutil_die( - DEBUG_ABORT, "tracking operation is unknown : %d", value_operation_type); + testutil_die(DEBUG_ABORT, "Unexpected operation in the tracking table: %d", + value_operation_type); break; } - error_code = cursor->next(cursor); - } + } while (cursor->next(cursor) == 0); if (cursor->reset(cursor) != 0) debug_print("Cursor could not be reset !", DEBUG_ERROR); } /* - * Compare the tracked operations against what has been saved on disk. collections is the + * Compare the tracked operations against what has been saved on disk. database is the * representation in memory of the collections after the test according to the tracking table. */ bool check_reference( - WT_SESSION *session, std::map *> &collections) + WT_SESSION *session, const std::string &collection_name, const database &database) { + bool is_valid = true; + collection_t collection; + key_t key; + key_value_t key_str, *value; + + /* Check the collection exists on disk. */ + is_valid = verify_collection_state(session, collection_name, true); - bool collection_exists, is_valid = true; - std::map *collection; - workload_validation wv; - std::string *value; - - for (const auto &it_collections : collections) { - /* Check the collection is in the correct state. */ - collection_exists = (it_collections.second != nullptr); - is_valid = wv.verify_database_state(session, it_collections.first, collection_exists); - - if (is_valid && collection_exists) { - collection = it_collections.second; - for (const auto &it_operations : *collection) { - value = (*collection)[it_operations.first]; - /* The key/value pair exists. */ - if (value != nullptr) - is_valid = (wv.is_key_present( - session, it_collections.first, it_operations.first) == true); - /* The key has been deleted. */ - else - is_valid = (wv.is_key_present( - session, it_collections.first, it_operations.first) == false); - - /* Check the associated value is valid. */ - if (is_valid && (value != nullptr)) { - is_valid = (wv.verify_value( - session, it_collections.first, it_operations.first, *value)); - } - - if (!is_valid) { - debug_print( - "check_reference failed for key " + std::to_string(it_operations.first), - DEBUG_ERROR); - break; - } + if (is_valid) { + collection = database.collections.at(collection_name); + /* Walk through each key/value pair of the current collection. */ + for (const auto &keys : collection.keys) { + key_str = keys.first; + key = keys.second; + /* The key/value pair exists. */ + if (key.exists) + is_valid = (is_key_present(session, collection_name, key_str.c_str()) == true); + /* The key has been deleted. */ + else + is_valid = (is_key_present(session, collection_name, key_str.c_str()) == false); + + /* Check the associated value is valid. */ + if (is_valid && key.exists) { + testutil_assert(collection.values != nullptr); + is_valid = verify_value(session, collection_name, key_str.c_str(), + collection.values->at(key_str).value); } - } - if (!is_valid) { - debug_print( - "check_reference failed for collection " + it_collections.first, DEBUG_ERROR); - break; + if (!is_valid) { + debug_print("check_reference failed for key " + key_str, DEBUG_ERROR); + break; + } } } + if (!is_valid) + debug_print("check_reference failed for collection " + collection_name, DEBUG_ERROR); + return (is_valid); } /* Check what is present on disk against what has been tracked. */ bool - check_disk_state(WT_SESSION *session, const std::string &collection_name, - std::map *> &collections) + check_disk_state( + WT_SESSION *session, const std::string &collection_name, const database &database) { WT_CURSOR *cursor; - int key; - const char *value; - bool is_valid; - std::string *value_str; - std::map *collection; + collection_t collection; + bool is_valid = true; + /* Key/value pairs on disk. */ + const char *key_on_disk, *value_on_disk; + key_value_t key_str, value_str; testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor)); - /* Check the collection has been tracked and contains data. */ - is_valid = - ((collections.count(collection_name) > 0) && (collections[collection_name] != nullptr)); - - if (!is_valid) - debug_print( - "Collection " + collection_name + " has not been tracked or has been deleted", - DEBUG_ERROR); - else - collection = collections[collection_name]; + collection = database.collections.at(collection_name); /* Read the collection on disk. */ while (is_valid && (cursor->next(cursor) == 0)) { - testutil_check(cursor->get_key(cursor, &key)); - testutil_check(cursor->get_value(cursor, &value)); + testutil_check(cursor->get_key(cursor, &key_on_disk)); + testutil_check(cursor->get_value(cursor, &value_on_disk)); - debug_print("Key is " + std::to_string(key), DEBUG_TRACE); - debug_print("Value is " + std::string(value), DEBUG_TRACE); + key_str = std::string(key_on_disk); + + debug_print("Key on disk is " + key_str, DEBUG_TRACE); + debug_print("Value on disk is " + std::string(value_on_disk), DEBUG_TRACE); - if (collection->count(key) > 0) { - value_str = collection->at(key); + /* Check the key on disk has been saved in memory too. */ + if ((collection.keys.count(key_str) > 0) && collection.keys.at(key_str).exists) { + /* Memory should be allocated for values. */ + testutil_assert(collection.values != nullptr); + value_str = collection.values->at(key_str).value; /* * Check the key/value pair on disk matches the one in memory from the tracked * operations. */ - is_valid = (value_str != nullptr) && (*value_str == std::string(value)); + is_valid = (value_str == key_value_t(value_on_disk)); if (!is_valid) - debug_print(" Key/Value pair mismatch.\n Disk key: " + std::to_string(key) + - "\n Disk value: " + std ::string(value) + - "\n Tracking table key: " + std::to_string(key) + - "\n Tracking table value: " + (value_str == nullptr ? "NULL" : *value_str), + debug_print(" Key/Value pair mismatch.\n Disk key: " + key_str + + "\n Disk value: " + std ::string(value_on_disk) + + "\n Tracking table key: " + key_str + "\n Tracking table value exists: " + + std::to_string(collection.keys.at(key_str).exists) + + "\n Tracking table value: " + value_str, DEBUG_ERROR); } else { is_valid = false; debug_print( - "The key " + std::to_string(key) + " present on disk has not been tracked", + "The key " + std::string(key_on_disk) + " present on disk has not been tracked", DEBUG_ERROR); } } @@ -364,7 +371,7 @@ class workload_validation { * needs to be set to true if the collection is expected to be existing, false otherwise. */ bool - verify_database_state( + verify_collection_state( WT_SESSION *session, const std::string &collection_name, bool exists) const { WT_CURSOR *cursor; @@ -398,8 +405,6 @@ class workload_validation { return (value == expected_value); } - - private: }; } // namespace test_harness