From bcbc157f83144f033bd9313f0638b71cfcbb2f92 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 18 Apr 2019 00:26:32 -0400 Subject: [PATCH 01/19] Added default value attribute to Schema::Column --- src/include/catalog/schema.h | 52 ++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index fef9e8313f..4c77bbfb1f 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -34,22 +34,38 @@ class Schema { * @param oid internal unique identifier for this column */ Column(std::string name, const type::TypeId type, const bool nullable, const col_oid_t oid) + : name_(std::move(name)), + type_(type), + attr_size_(type::TypeUtil::GetTypeSize(type_)), + nullable_(nullable), + inlined_(true), + oid_(oid), + default_(nullptr) { + validateColumn(); + } + + /** + * Instantiates a Column object, primary to be used for building a Schema object + * @param name column name + * @param type SQL type for this column + * @param nullable true if the column is nullable, false otherwise + * @param oid internal unique identifier for this column + * @param default_value default value for this column + */ + Column(std::string name, const type::TypeId type, const bool nullable, const col_oid_t oid, byte *default_value) : name_(std::move(name)), type_(type), attr_size_(type::TypeUtil::GetTypeSize(type_)), nullable_(nullable), inlined_(true), oid_(oid) { - if (attr_size_ == VARLEN_COLUMN) { - // this is a varlen attribute - // attr_size_ is actual size + high bit via GetTypeSize - inlined_ = false; - } - TERRIER_ASSERT( - attr_size_ == 1 || attr_size_ == 2 || attr_size_ == 4 || attr_size_ == 8 || attr_size_ == VARLEN_COLUMN, - "Attribute size must be 1, 2, 4, 8 or VARLEN_COLUMN bytes."); - TERRIER_ASSERT(type_ != type::TypeId::INVALID, "Attribute type cannot be INVALID."); + // Copy the passed in default value + // ASSUMPTION: The default_value passed in is of size attr_size_ + default_ = new byte[attr_size_]; + std::memcpy(default_, default_value, attr_size_); + validateColumn(); } + /** * @return column name */ @@ -76,14 +92,28 @@ class Schema { col_oid_t GetOid() const { return oid_; } private: + /** + * Validate the attr_size_ and type of the column + */ + void validateColumn() { + if (attr_size_ == VARLEN_COLUMN) { + // this is a varlen attribute + // attr_size_ is actual size + high bit via GetTypeSize + inlined_ = false; + } + TERRIER_ASSERT( + attr_size_ == 1 || attr_size_ == 2 || attr_size_ == 4 || attr_size_ == 8 || attr_size_ == VARLEN_COLUMN, + "Attribute size must be 1, 2, 4, 8 or VARLEN_COLUMN bytes."); + TERRIER_ASSERT(type_ != type::TypeId::INVALID, "Attribute type cannot be INVALID."); + } + const std::string name_; const type::TypeId type_; uint8_t attr_size_; const bool nullable_; bool inlined_; const col_oid_t oid_; - // TODO(Matt): default value would go here - // Value default_; + byte *default_; }; /** From eb4053b46b30d349159ca3325266c8b1909b741b Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 18 Apr 2019 00:48:49 -0400 Subject: [PATCH 02/19] Populating default value during UpdateSchema --- src/include/catalog/schema.h | 10 ++++++++++ src/include/storage/sql_table.h | 3 ++- src/include/storage/storage_defs.h | 1 + src/storage/sql_table.cpp | 11 ++++++++++- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 4c77bbfb1f..3b7f38cc2f 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -66,6 +66,11 @@ class Schema { validateColumn(); } + /** + * Free the memory allocated to default_ in the destructor + */ + ~Column() { if (default_ != nullptr) free(default_); } + /** * @return column name */ @@ -90,6 +95,10 @@ class Schema { * @return internal unique identifier for this column */ col_oid_t GetOid() const { return oid_; } + /** + * @return default value for this column + */ + byte* GetDefault() const { return default_; } private: /** @@ -155,6 +164,7 @@ class Schema { */ const storage::layout_version_t GetVersion() const { return version_; } + private: const storage::layout_version_t version_; const std::vector columns_; diff --git a/src/include/storage/sql_table.h b/src/include/storage/sql_table.h index 078f728540..474caa9b95 100644 --- a/src/include/storage/sql_table.h +++ b/src/include/storage/sql_table.h @@ -32,7 +32,8 @@ class SqlTable { BlockLayout layout; ColumnMap column_map; InverseColumnMap inverse_column_map; - // TODO(John): Add 'default_value_map' (dynamic) for col_oid->default_val + // TODO(Sai): Do we really need a defaultValueMap for every version? One for SqlTable should work? + DefaultValueMap default_value_map; }; /** diff --git a/src/include/storage/storage_defs.h b/src/include/storage/storage_defs.h index 8e9efbacfb..a5d319dd47 100644 --- a/src/include/storage/storage_defs.h +++ b/src/include/storage/storage_defs.h @@ -168,6 +168,7 @@ using ColumnMap = std::unordered_map; */ using InverseColumnMap = std::unordered_map; using ProjectionMap = std::unordered_map; +using DefaultValueMap = std::unordered_map; /** * Denote whether a record modifies the logical delete column, used when DataTable inspects deltas diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 598b5b3c9a..b6f6b2da49 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -108,6 +108,15 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { } } + // Populate the default value map + DefaultValueMap default_value_map; + for (const auto &column : schema.GetColumns()) { + byte *default_value = column.GetDefault(); + if (default_value) { + default_value_map[column.GetOid()] = default_value; + } + } + BlockLayout layout = storage::BlockLayout(attr_sizes); auto dt = new DataTable(block_store_, layout, schema.GetVersion()); @@ -117,7 +126,7 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { // for this allocation is in the destructor for SqlTable. clang-analyzer-cplusplus.NewDeleteLeaks identifies this // as a potential leak and throws an error incorrectly. // NOLINTNEXTLINE - tables_.Insert(schema.GetVersion(), {dt, layout, col_map, inv_col_map}); + tables_.Insert(schema.GetVersion(), {dt, layout, col_map, inv_col_map, default_value_map}); } bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlot slot, ProjectedRow *const out_buffer, From b56aff78eeb26443c94a5d1951163f78c94c8e31 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 18 Apr 2019 02:31:08 -0400 Subject: [PATCH 03/19] Default values tested in Select --- src/include/catalog/schema.h | 2 +- src/include/storage/storage_defs.h | 2 +- src/storage/sql_table.cpp | 19 +++++++++++++++-- test/storage/sql_table_test.cpp | 33 ++++++++++++++++++++++-------- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 3b7f38cc2f..37145c29c1 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -69,7 +69,7 @@ class Schema { /** * Free the memory allocated to default_ in the destructor */ - ~Column() { if (default_ != nullptr) free(default_); } + ~Column() = default; /** * @return column name diff --git a/src/include/storage/storage_defs.h b/src/include/storage/storage_defs.h index a5d319dd47..c356016c56 100644 --- a/src/include/storage/storage_defs.h +++ b/src/include/storage/storage_defs.h @@ -168,7 +168,7 @@ using ColumnMap = std::unordered_map; */ using InverseColumnMap = std::unordered_map; using ProjectionMap = std::unordered_map; -using DefaultValueMap = std::unordered_map; +using DefaultValueMap = std::unordered_map>; /** * Denote whether a record modifies the logical delete column, used when DataTable inspects deltas diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index b6f6b2da49..5d451bb677 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -112,8 +112,9 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { DefaultValueMap default_value_map; for (const auto &column : schema.GetColumns()) { byte *default_value = column.GetDefault(); + uint8_t attr_size = column.GetAttrSize(); if (default_value) { - default_value_map[column.GetOid()] = default_value; + default_value_map[column.GetOid()] = {default_value, attr_size}; } } @@ -145,6 +146,7 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo } auto old_dt_version = tables_.Find(old_version_num)->second; + auto curr_dt_version = tables_.Find(version_num)->second; // The slot version is not the same as the version_num col_id_t original_column_ids[out_buffer->NumColumns()]; @@ -154,7 +156,20 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo bool result = old_dt_version.data_table->Select(txn, slot, out_buffer); std::memcpy(out_buffer->ColumnIds(), original_column_ids, sizeof(col_id_t) * out_buffer->NumColumns()); - // TODO(Yashwanth): handle default values + // Get the DefaultValueMap visible to this transaction + DefaultValueMap default_val_map = curr_dt_version.default_value_map; + + // Populate default values into the empty columns of the ProjectedRow (if any) + STORAGE_LOG_DEBUG("Loading default values") + for (uint16_t i = 0; i < out_buffer->NumColumns(); i++) { + catalog::col_oid_t col_oid = curr_dt_version.inverse_column_map.at(out_buffer->ColumnIds()[i]); + // Fill in the default value if the entry is not null and the col_oid is in the map + if (default_val_map.count(col_oid) > 0 && out_buffer->IsNull(i)) { + auto dv_pair = default_val_map.at(col_oid); + storage::StorageUtil::CopyWithNullCheck(dv_pair.first, out_buffer, dv_pair.second, i); + } + } + return result; } diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index b067325485..24369d3bd5 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -38,9 +38,9 @@ class SqlTableTestRW { } void AddColumn(transaction::TransactionContext *txn, std::string name, type::TypeId type, bool nullable, - catalog::col_oid_t oid) { + catalog::col_oid_t oid, byte* default_value) { // update columns, schema and layout - cols_.emplace_back(name, type, nullable, oid); + cols_.emplace_back(name, type, nullable, oid, default_value); delete schema_; delete layout_; schema_ = new catalog::Schema(cols_, next_version_++); @@ -205,6 +205,17 @@ class SqlTableTestRW { } } + /** + * Convert an integer to byte array + * @param n an integer + * @return byte array + */ + byte* intToByteArray(int n) { + byte *byteArray = new byte[sizeof(n)]; + memcpy(byteArray,(const char *)&n,sizeof(n)); + return byteArray; + } + public: // This is a public field that transactions can set and read. // The purpose is to record the version for each transaction. In reality this information should be retrieved from @@ -275,7 +286,10 @@ TEST_F(SqlTableTests, SelectTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); - table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2)); + int default_val = 42; + // Add a new column with a default value + table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), + table.intToByteArray(default_val)); id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); EXPECT_EQ(100, id); @@ -287,8 +301,11 @@ TEST_F(SqlTableTests, SelectTest) { datname = table.GetIntColInRow(txn, catalog::col_oid_t(1), row2_slot); EXPECT_EQ(10001, datname); + // The new_column should return the default_value for the old version slots uint32_t new_col = table.GetIntColInRow(txn, catalog::col_oid_t(2), row1_slot); - EXPECT_EQ(12345, new_col); + EXPECT_EQ(default_val, new_col); + new_col = table.GetIntColInRow(txn, catalog::col_oid_t(2), row2_slot); + EXPECT_EQ(default_val, new_col); txn_manager_.Commit(txn, TestCallbacks::EmptyCallback, nullptr); delete txn; @@ -313,7 +330,7 @@ TEST_F(SqlTableTests, InsertTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); - table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2)); + table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), nullptr); // insert (300, 10002, null) table.StartInsertRow(); @@ -368,7 +385,7 @@ TEST_F(SqlTableTests, DeleteTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); - table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2)); + table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), nullptr); // insert (300, 10002, null) table.StartInsertRow(); @@ -418,7 +435,7 @@ TEST_F(SqlTableTests, UpdateTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); - table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2)); + table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), nullptr); // insert (300, 10002, null) table.StartInsertRow(); @@ -526,7 +543,7 @@ TEST_F(SqlTableTests, ScanTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); - table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2)); + table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), nullptr); // insert (300, 10002, null) table.StartInsertRow(); From 9978b503c73ea2f2dcdb6539d40186c577ca1efb Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 18 Apr 2019 03:39:20 -0400 Subject: [PATCH 04/19] Added nullptr check for default_value argument --- src/include/catalog/schema.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 37145c29c1..04371d0cc5 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -46,6 +46,7 @@ class Schema { /** * Instantiates a Column object, primary to be used for building a Schema object + * NOTE: Overloaded to avoid changing all Column initializations in the code * @param name column name * @param type SQL type for this column * @param nullable true if the column is nullable, false otherwise @@ -59,10 +60,14 @@ class Schema { nullable_(nullable), inlined_(true), oid_(oid) { - // Copy the passed in default value // ASSUMPTION: The default_value passed in is of size attr_size_ - default_ = new byte[attr_size_]; - std::memcpy(default_, default_value, attr_size_); + // Copy the passed in default value + if (default_value) { + default_ = new byte[attr_size_]; + std::memcpy(default_, default_value, attr_size_); + } else { + default_ = default_value; + } validateColumn(); } From 87531a20eb24e3e3fb9c56fe6e420b4e0c4165c1 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 18 Apr 2019 03:39:38 -0400 Subject: [PATCH 05/19] Default values tested in Scan --- src/storage/sql_table.cpp | 30 ++++++++++++++++++++++++++++++ test/storage/sql_table_test.cpp | 22 ++++++++++++---------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 5d451bb677..0afde1ee50 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -311,6 +311,36 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt uint32_t filled = out_buffer->NumTuples(); std::memcpy(out_buffer->ColumnIds(), original_column_ids, sizeof(col_id_t) * out_buffer->NumColumns()); out_buffer->SetNumTuples(filled); + + // Populate the default values + if (filled > 0) { + // Since we are populating multiple tuples, calculate the columns matching with default value map + auto curr_dt_version = tables_.Find(version_num)->second; + DefaultValueMap default_val_map = curr_dt_version.default_value_map; + // Find out the columns which contain default values and their projection list index + std::vector> matching_cols; + for (uint16_t i = 0; i < out_buffer->NumColumns(); i++) { + catalog::col_oid_t col_oid = curr_dt_version.inverse_column_map.at(out_buffer->ColumnIds()[i]); + // Fill in the default value if the entry is not null and the col_oid is in the map + if (default_val_map.count(col_oid) > 0) { + matching_cols.emplace_back(col_oid, i); + } + } + + // Fill in the matching default values for each of the rows + // Since each row could have different columns unfilled, using RowView to iterate over ProjectedColumns + for (uint32_t row_idx=0; row_idxInterpretAsRow(curr_dt_version.layout, row_idx); + for (auto col_oid_idx_pair : matching_cols) { + auto col_oid = col_oid_idx_pair.first; + auto idx = col_oid_idx_pair.second; + if (row.IsNull(idx)) { + auto dv_pair = default_val_map.at(col_oid); + storage::StorageUtil::CopyWithNullCheck(dv_pair.first, &row, dv_pair.second, idx); + } + } + } + } } std::vector SqlTable::ColIdsForOids(const std::vector &col_oids, diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index 24369d3bd5..721f15221a 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -506,6 +506,7 @@ TEST_F(SqlTableTests, UpdateTest) { TEST_F(SqlTableTests, ScanTest) { std::map datname_map; std::map new_col_map; + uint32_t new_col_default_value = 1729; std::map seen_map; datname_map[100] = 10000; @@ -543,9 +544,10 @@ TEST_F(SqlTableTests, ScanTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); - table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), nullptr); + table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), + table.intToByteArray(new_col_default_value)); - // insert (300, 10002, null) + // insert (300, 10002, 1729) table.StartInsertRow(); table.SetIntColInRow(catalog::col_oid_t(0), 300); table.SetIntColInRow(catalog::col_oid_t(1), datname_map[300]); @@ -587,10 +589,10 @@ TEST_F(SqlTableTests, ScanTest) { uint32_t datname = *reinterpret_cast(value); EXPECT_EQ(datname, datname_map[id]); value = row1.AccessWithNullCheck(pc_pair.second.at(catalog::col_oid_t(2))); + uint32_t new_col = *reinterpret_cast(value); if (id != 400) { - EXPECT_EQ(value, nullptr); + EXPECT_EQ(new_col, new_col_default_value); } else { - uint32_t new_col = *reinterpret_cast(value); EXPECT_EQ(new_col, new_col_map[id]); } @@ -606,10 +608,10 @@ TEST_F(SqlTableTests, ScanTest) { datname = *reinterpret_cast(value); EXPECT_EQ(datname, datname_map[id]); value = row2.AccessWithNullCheck(pc_pair.second.at(catalog::col_oid_t(2))); + new_col = *reinterpret_cast(value); if (id != 400) { - EXPECT_EQ(value, nullptr); + EXPECT_EQ(new_col, new_col_default_value); } else { - uint32_t new_col = *reinterpret_cast(value); EXPECT_EQ(new_col, new_col_map[id]); } @@ -630,10 +632,10 @@ TEST_F(SqlTableTests, ScanTest) { datname = *reinterpret_cast(value); EXPECT_EQ(datname, datname_map[id]); value = row3.AccessWithNullCheck(pc_pair.second.at(catalog::col_oid_t(2))); + new_col = *reinterpret_cast(value); if (id != 400) { - EXPECT_EQ(value, nullptr); + EXPECT_EQ(new_col, new_col_default_value); } else { - uint32_t new_col = *reinterpret_cast(value); EXPECT_EQ(new_col, new_col_map[id]); } @@ -649,10 +651,10 @@ TEST_F(SqlTableTests, ScanTest) { datname = *reinterpret_cast(value); EXPECT_EQ(datname, datname_map[id]); value = row4.AccessWithNullCheck(pc_pair.second.at(catalog::col_oid_t(2))); + new_col = *reinterpret_cast(value); if (id != 400) { - EXPECT_EQ(value, nullptr); + EXPECT_EQ(new_col, new_col_default_value); } else { - uint32_t new_col = *reinterpret_cast(value); EXPECT_EQ(new_col, new_col_map[id]); } From c0187a30334292a5dd2d5046ba6832da0046618c Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 18 Apr 2019 04:44:05 -0400 Subject: [PATCH 06/19] Fixed format --- src/include/catalog/schema.h | 5 ++--- src/include/storage/storage_defs.h | 2 +- src/storage/sql_table.cpp | 4 ++-- test/storage/sql_table_test.cpp | 12 ++++++------ 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 04371d0cc5..f009545523 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -62,7 +62,7 @@ class Schema { oid_(oid) { // ASSUMPTION: The default_value passed in is of size attr_size_ // Copy the passed in default value - if (default_value) { + if (default_value != nullptr) { default_ = new byte[attr_size_]; std::memcpy(default_, default_value, attr_size_); } else { @@ -103,7 +103,7 @@ class Schema { /** * @return default value for this column */ - byte* GetDefault() const { return default_; } + byte *GetDefault() const { return default_; } private: /** @@ -169,7 +169,6 @@ class Schema { */ const storage::layout_version_t GetVersion() const { return version_; } - private: const storage::layout_version_t version_; const std::vector columns_; diff --git a/src/include/storage/storage_defs.h b/src/include/storage/storage_defs.h index c356016c56..0b47f3b8ed 100644 --- a/src/include/storage/storage_defs.h +++ b/src/include/storage/storage_defs.h @@ -168,7 +168,7 @@ using ColumnMap = std::unordered_map; */ using InverseColumnMap = std::unordered_map; using ProjectionMap = std::unordered_map; -using DefaultValueMap = std::unordered_map>; +using DefaultValueMap = std::unordered_map>; /** * Denote whether a record modifies the logical delete column, used when DataTable inspects deltas diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 0afde1ee50..a75bc3870c 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -113,7 +113,7 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { for (const auto &column : schema.GetColumns()) { byte *default_value = column.GetDefault(); uint8_t attr_size = column.GetAttrSize(); - if (default_value) { + if (default_value != nullptr) { default_value_map[column.GetOid()] = {default_value, attr_size}; } } @@ -329,7 +329,7 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt // Fill in the matching default values for each of the rows // Since each row could have different columns unfilled, using RowView to iterate over ProjectedColumns - for (uint32_t row_idx=0; row_idxInterpretAsRow(curr_dt_version.layout, row_idx); for (auto col_oid_idx_pair : matching_cols) { auto col_oid = col_oid_idx_pair.first; diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index 721f15221a..919da5f05e 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -38,7 +38,7 @@ class SqlTableTestRW { } void AddColumn(transaction::TransactionContext *txn, std::string name, type::TypeId type, bool nullable, - catalog::col_oid_t oid, byte* default_value) { + catalog::col_oid_t oid, byte *default_value) { // update columns, schema and layout cols_.emplace_back(name, type, nullable, oid, default_value); delete schema_; @@ -210,9 +210,9 @@ class SqlTableTestRW { * @param n an integer * @return byte array */ - byte* intToByteArray(int n) { - byte *byteArray = new byte[sizeof(n)]; - memcpy(byteArray,(const char *)&n,sizeof(n)); + byte *intToByteArray(int n) { + auto byteArray = new byte[sizeof(n)]; + memcpy(byteArray, reinterpret_cast(&n), sizeof(n)); return byteArray; } @@ -289,7 +289,7 @@ TEST_F(SqlTableTests, SelectTest) { int default_val = 42; // Add a new column with a default value table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), - table.intToByteArray(default_val)); + table.intToByteArray(default_val)); id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); EXPECT_EQ(100, id); @@ -545,7 +545,7 @@ TEST_F(SqlTableTests, ScanTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), - table.intToByteArray(new_col_default_value)); + table.intToByteArray(new_col_default_value)); // insert (300, 10002, 1729) table.StartInsertRow(); From 932cf752c4892fb87f3ee020b8d432e584f476a4 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Thu, 2 May 2019 16:15:43 -0400 Subject: [PATCH 07/19] Changed back to single Column constructor with a defaultValue argument --- src/include/catalog/schema.h | 54 +++++++++++------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index f009545523..21e684b5cb 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -32,8 +32,10 @@ class Schema { * @param type SQL type for this column * @param nullable true if the column is nullable, false otherwise * @param oid internal unique identifier for this column + * @param default_value default value for this column. Null by default */ - Column(std::string name, const type::TypeId type, const bool nullable, const col_oid_t oid) + Column(std::string name, const type::TypeId type, const bool nullable, const col_oid_t oid, + byte *default_value = nullptr) : name_(std::move(name)), type_(type), attr_size_(type::TypeUtil::GetTypeSize(type_)), @@ -41,34 +43,25 @@ class Schema { inlined_(true), oid_(oid), default_(nullptr) { - validateColumn(); - } + // Check whether the column can be inlined + if (attr_size_ == VARLEN_COLUMN) { + // this is a varlen attribute + // attr_size_ is actual size + high bit via GetTypeSize + inlined_ = false; + } + // Validate the type of the column based on attribute size + TERRIER_ASSERT( + attr_size_ == 1 || attr_size_ == 2 || attr_size_ == 4 || attr_size_ == 8 || attr_size_ == VARLEN_COLUMN, + "Attribute size must be 1, 2, 4, 8 or VARLEN_COLUMN bytes."); + TERRIER_ASSERT(type_ != type::TypeId::INVALID, "Attribute type cannot be INVALID."); - /** - * Instantiates a Column object, primary to be used for building a Schema object - * NOTE: Overloaded to avoid changing all Column initializations in the code - * @param name column name - * @param type SQL type for this column - * @param nullable true if the column is nullable, false otherwise - * @param oid internal unique identifier for this column - * @param default_value default value for this column - */ - Column(std::string name, const type::TypeId type, const bool nullable, const col_oid_t oid, byte *default_value) - : name_(std::move(name)), - type_(type), - attr_size_(type::TypeUtil::GetTypeSize(type_)), - nullable_(nullable), - inlined_(true), - oid_(oid) { // ASSUMPTION: The default_value passed in is of size attr_size_ - // Copy the passed in default value + // Copy the passed in default value (if exists) + // TODO(Sai): Handle VARLEN attributes differently. if (default_value != nullptr) { default_ = new byte[attr_size_]; std::memcpy(default_, default_value, attr_size_); - } else { - default_ = default_value; } - validateColumn(); } /** @@ -106,21 +99,6 @@ class Schema { byte *GetDefault() const { return default_; } private: - /** - * Validate the attr_size_ and type of the column - */ - void validateColumn() { - if (attr_size_ == VARLEN_COLUMN) { - // this is a varlen attribute - // attr_size_ is actual size + high bit via GetTypeSize - inlined_ = false; - } - TERRIER_ASSERT( - attr_size_ == 1 || attr_size_ == 2 || attr_size_ == 4 || attr_size_ == 8 || attr_size_ == VARLEN_COLUMN, - "Attribute size must be 1, 2, 4, 8 or VARLEN_COLUMN bytes."); - TERRIER_ASSERT(type_ != type::TypeId::INVALID, "Attribute type cannot be INVALID."); - } - const std::string name_; const type::TypeId type_; uint8_t attr_size_; From 50c947d787ee45952b02d7168b2736baaeaf67cf Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Fri, 3 May 2019 19:20:00 -0400 Subject: [PATCH 08/19] Provide API for setting and clearing default values --- src/include/catalog/schema.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index bf23b47041..30dd5e7e9f 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -49,8 +49,7 @@ class Schema { // ASSUMPTION: The default_value passed in is of size attr_size_ // Copy the passed in default value (if exists) if (default_value != nullptr) { - default_ = new byte[attr_size_]; - std::memcpy(default_, default_value, attr_size_); + SetDefault(default_value); } } @@ -114,6 +113,29 @@ class Schema { */ byte *GetDefault() const { return default_; } + /** + * Clear the default value of the column + */ + void ClearDefault() { + if (default_ != nullptr) { + // Free the memory allocate to the default value + delete default_; + default_ = nullptr; + } + } + + /** + * Set the default value of the column + * @param default_value default_value as a bytes array + */ + void SetDefault(byte *default_value) { + if (default_ == nullptr) { + default_ = new byte[attr_size_]; + } + // Copy the new default value + std::memcpy(default_, default_value, attr_size_); + } + private: const std::string name_; const type::TypeId type_; From 3bdca7f0a24e7e72c4c2744b6e38713eeb3f7223 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Fri, 3 May 2019 19:20:51 -0400 Subject: [PATCH 09/19] Added a failing test case for populating default values. Single DefaultValueMap for SqlTable should fix this --- test/storage/sql_table_test.cpp | 203 +++++++++++++++++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index 25fd99a338..f3c64f2647 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -39,8 +39,9 @@ class SqlTableTestRW { cols_.emplace_back(name, type, 255, nullable, oid); } + // TODO: Change the function signature to make default_value as an optional argument void AddColumn(transaction::TransactionContext *txn, std::string name, type::TypeId type, bool nullable, - catalog::col_oid_t oid, byte *default_value) { + catalog::col_oid_t oid, byte *default_value=nullptr) { // update columns, schema and layout cols_.emplace_back(name, type, nullable, oid, default_value); delete schema_; @@ -62,6 +63,21 @@ class SqlTableTestRW { pri_ = new storage::ProjectedRowInitializer(std::get<0>(row_pair)); pr_map_ = new storage::ProjectionMap(std::get<1>(row_pair)); } + + /** + * Set a default value for the given column oid. This doesn't need to call UpdateSchema + * @param oid oid of the column + * @param default_value the default value to be set + */ + void SetColumnDefault(catalog::col_oid_t oid, byte *default_value) { + // Get the index of this oid in the col_oids vector + auto col_pos = std::find(col_oids_.begin(), col_oids_.end(), oid); + TERRIER_ASSERT(col_pos != col_oids_.end(), "oid doesn't exist in the table"); + auto idx = col_pos - col_oids_.begin(); + + cols_.at(idx).SetDefault(default_value); + } + /** * Create the SQL table. */ @@ -250,6 +266,14 @@ class SqlTableTestRW { (*reinterpret_cast(col_p)) = value; } + /** + * Set the attribute of col_oid to null + * @param col_oid col_oid of the column in the schema + */ + void SetNullInRow(catalog::col_oid_t col_oid) { + pr_->SetNull(pr_map_->at(col_oid)); + } + /** * Read a string from a row * @param col_num column number in the schema @@ -810,4 +834,181 @@ TEST_F(SqlTableTests, MultipleColumnWidths) { txn_manager_.Commit(txn, TestCallbacks::EmptyCallback, nullptr); delete txn; } + +// NOLINTNEXTLINE +TEST_F(SqlTableTests, BasicDefaultValuesTest) { + // TODO: Test a typical Default value condition + // Have 3 types of columns, 1 with default value at creation, one without and one with + // default value added later explicitly. + // Insert rows and check the output + SqlTableTestRW table(catalog::table_oid_t(2)); + auto txn = txn_manager_.BeginTransaction(); + + // Create a table of 2 columns, with no default values at the start. + table.DefineColumn("id", type::TypeId::INTEGER, false, catalog::col_oid_t(0)); + table.DefineColumn("col1", type::TypeId::INTEGER, false, catalog::col_oid_t(1)); + table.Create(); + + // Insert (1, 100) + table.StartInsertRow(); + table.SetIntColInRow(catalog::col_oid_t(0), 1); + table.SetIntColInRow(catalog::col_oid_t(1), 100); + storage::TupleSlot row1_slot = table.EndInsertRow(txn); + + // Add a new column with a default value and insert a row + // Explicitly set the layout version number + table.version_ = storage::layout_version_t(1); + int col2_default = 42; + table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2), + table.intToByteArray(col2_default)); + + // Insert (2, 200, 42) + // NOTE: The default values for new rows are filled in by the execution engine + table.StartInsertRow(); + table.SetIntColInRow(catalog::col_oid_t(0), 2); + table.SetIntColInRow(catalog::col_oid_t(1), 200); + table.SetIntColInRow(catalog::col_oid_t(2), 42); + storage::TupleSlot row2_slot = table.EndInsertRow(txn); + + // 1st row should be (1, 100, 42) + uint32_t id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); + EXPECT_EQ(1, id); + uint32_t col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row1_slot); + EXPECT_EQ(100, col1); + uint32_t col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row1_slot); + EXPECT_EQ(col2_default, col2); + + // Add another column with a default value and insert a row + table.version_ = storage::layout_version_t(2); + int col3_default = 1729; + table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), + table.intToByteArray(col3_default)); + + // Insert (3, 300, 42, NULL) + // NOTE: The default values for new rows are filled in by the execution engine + table.StartInsertRow(); + table.SetIntColInRow(catalog::col_oid_t(0), 3); + table.SetIntColInRow(catalog::col_oid_t(1), 300); + table.SetIntColInRow(catalog::col_oid_t(2), 42); +// table.SetIntColInRow(catalog::col_oid_t(3), 1729); + storage::TupleSlot row3_slot = table.EndInsertRow(txn); + + // SELECT and validate all the rows + // 1st row should be (1, 100, 42, 1729) + id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); + EXPECT_EQ(1, id); + col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row1_slot); + EXPECT_EQ(100, col1); + col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row1_slot); + EXPECT_EQ(col2_default, col2); + uint32_t col3 = table.GetIntColInRow(txn, catalog::col_oid_t(3), row1_slot); + EXPECT_EQ(col3_default, col3); + + // 2nd tuple should be (2, 200, 42, 1729) + id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row2_slot); + EXPECT_EQ(2, id); + col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row2_slot); + EXPECT_EQ(200, col1); + col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row2_slot); + EXPECT_EQ(col2_default, col2); + col3 = table.GetIntColInRow(txn, catalog::col_oid_t(3), row2_slot); + EXPECT_EQ(col3_default, col3); + + // 2nd tuple should be (2, 200, 42, 1729) + id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row3_slot); + EXPECT_EQ(3, id); + col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row3_slot); + EXPECT_EQ(300, col1); + col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row3_slot); + EXPECT_EQ(col2_default, col2); +// col3 = table.GetIntColInRow(txn, catalog::col_oid_t(3), row3_slot); +// EXPECT_EQ(col3_default, col3); + bool is_col3_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row3_slot); + EXPECT_TRUE(is_col3_null); + + txn_manager_.Commit(txn, TestCallbacks::EmptyCallback, nullptr); + delete txn; +} + +// NOLINTNEXTLINE +TEST_F(SqlTableTests, ModifyDefaultValuesTest) { + // Testing the case of adding a column without a default value + // and then setting the default value for that column. + // This should not retro-actively populate the default value for older tuples + SqlTableTestRW table(catalog::table_oid_t(2)); + auto txn = txn_manager_.BeginTransaction(); + + // Create a table of 2 columns, with no default values at the start. + table.DefineColumn("id", type::TypeId::INTEGER, false, catalog::col_oid_t(0)); + table.DefineColumn("col1", type::TypeId::INTEGER, false, catalog::col_oid_t(1)); + table.Create(); + + // Insert (1, 100) + table.StartInsertRow(); + table.SetIntColInRow(catalog::col_oid_t(0), 1); + table.SetIntColInRow(catalog::col_oid_t(1), 100); + storage::TupleSlot row1_slot = table.EndInsertRow(txn); + + // Explicitly set the layout version number + table.version_ = storage::layout_version_t(1); + // Add a new column WITHOUT a default value + table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2)); + + // Insert (2, 200, NULL) i.e. nothing passed in for col2 + table.StartInsertRow(); + table.SetIntColInRow(catalog::col_oid_t(0), 2); + table.SetIntColInRow(catalog::col_oid_t(1), 200); + storage::TupleSlot row2_slot = table.EndInsertRow(txn); + + // Now set the default value of the column to something + int col2_default = 42; + table.SetColumnDefault(catalog::col_oid_t(2), table.intToByteArray(col2_default)); + // Add a new column - to trigger the UpdateSchema + // TODO(Sai): This is populating default values when it shouldn't be for col2. FIX IT! + table.version_ = storage::layout_version_t(2); + table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3)); + + // Insert (3, 300, 42) - The default value will be populated by the execution engine + table.StartInsertRow(); + table.SetIntColInRow(catalog::col_oid_t(0), 3); + table.SetIntColInRow(catalog::col_oid_t(1), 300); + table.SetIntColInRow(catalog::col_oid_t(2), col2_default); + storage::TupleSlot row3_slot = table.EndInsertRow(txn); + + // Validate the tuples + // 1st row should be (1, 100, NULL) + uint32_t id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); + EXPECT_EQ(1, id); + uint32_t col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row1_slot); + EXPECT_EQ(100, col1); + bool col2_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(2), row1_slot); + EXPECT_TRUE(col2_is_null); + + // 2nd tuple should be (2, 200, NULL) + id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row2_slot); + EXPECT_EQ(2, id); + col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row2_slot); + EXPECT_EQ(200, col1); + col2_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(2), row1_slot); + EXPECT_TRUE(col2_is_null); + + // 3rd tuple should be (3, 300, 42) + id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row3_slot); + EXPECT_EQ(3, id); + col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row3_slot); + EXPECT_EQ(300, col1); + uint32_t col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row3_slot); + EXPECT_EQ(col2_default, col2); + + // TODO: Test the following cases + // 2. Column has no default value but is then added later + // - Handled by the execution engine. Just make sure values are as expected + // 3. Column has default value during creation, but removed later + // - Old rows should return default value but new ones shouldn't + // 4. Column with a default value, but the value passed in is forced to be NULL. + // - Default values shouldn't be filled in for these. + + txn_manager_.Commit(txn, TestCallbacks::EmptyCallback, nullptr); + delete txn; +} } // namespace terrier From 22c30412304ce71661a1df5f8ff9b597ae7ae9cf Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Fri, 3 May 2019 22:13:53 -0400 Subject: [PATCH 10/19] Modify test case for default value of an old table --- test/storage/sql_table_test.cpp | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index f3c64f2647..d857a012f4 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -862,15 +862,13 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2), table.intToByteArray(col2_default)); - // Insert (2, 200, 42) - // NOTE: The default values for new rows are filled in by the execution engine + // Insert (2, NULL, 890) table.StartInsertRow(); table.SetIntColInRow(catalog::col_oid_t(0), 2); - table.SetIntColInRow(catalog::col_oid_t(1), 200); - table.SetIntColInRow(catalog::col_oid_t(2), 42); + table.SetIntColInRow(catalog::col_oid_t(2), 890); storage::TupleSlot row2_slot = table.EndInsertRow(txn); - // 1st row should be (1, 100, 42) + // 1st row should be (1, 100, 42) by now uint32_t id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); EXPECT_EQ(1, id); uint32_t col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row1_slot); @@ -884,13 +882,11 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), table.intToByteArray(col3_default)); - // Insert (3, 300, 42, NULL) + // Insert (3, 300, NULL, NULL) // NOTE: The default values for new rows are filled in by the execution engine table.StartInsertRow(); table.SetIntColInRow(catalog::col_oid_t(0), 3); table.SetIntColInRow(catalog::col_oid_t(1), 300); - table.SetIntColInRow(catalog::col_oid_t(2), 42); -// table.SetIntColInRow(catalog::col_oid_t(3), 1729); storage::TupleSlot row3_slot = table.EndInsertRow(txn); // SELECT and validate all the rows @@ -904,25 +900,23 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { uint32_t col3 = table.GetIntColInRow(txn, catalog::col_oid_t(3), row1_slot); EXPECT_EQ(col3_default, col3); - // 2nd tuple should be (2, 200, 42, 1729) + // 2nd tuple should be (2, NULL, 890, 1729) id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row2_slot); EXPECT_EQ(2, id); - col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row2_slot); - EXPECT_EQ(200, col1); + bool col1_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(1), row2_slot); + EXPECT_TRUE(col1_is_null); col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row2_slot); - EXPECT_EQ(col2_default, col2); + EXPECT_EQ(890, col2); col3 = table.GetIntColInRow(txn, catalog::col_oid_t(3), row2_slot); EXPECT_EQ(col3_default, col3); - // 2nd tuple should be (2, 200, 42, 1729) + // 3rd tuple should be (3, 300, NULL, NULL) id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row3_slot); EXPECT_EQ(3, id); col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row3_slot); EXPECT_EQ(300, col1); - col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row3_slot); - EXPECT_EQ(col2_default, col2); -// col3 = table.GetIntColInRow(txn, catalog::col_oid_t(3), row3_slot); -// EXPECT_EQ(col3_default, col3); + bool is_col2_null = table.IsNullColInRow(txn, catalog::col_oid_t(2), row3_slot); + EXPECT_TRUE(is_col2_null); bool is_col3_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row3_slot); EXPECT_TRUE(is_col3_null); @@ -964,7 +958,6 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { int col2_default = 42; table.SetColumnDefault(catalog::col_oid_t(2), table.intToByteArray(col2_default)); // Add a new column - to trigger the UpdateSchema - // TODO(Sai): This is populating default values when it shouldn't be for col2. FIX IT! table.version_ = storage::layout_version_t(2); table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3)); From 404270a4a0c33b8cc218278dceafea3d33c49fe2 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 13:33:26 -0400 Subject: [PATCH 11/19] Add new column check in test case --- test/storage/sql_table_test.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index d857a012f4..9a931969b2 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -961,7 +961,7 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { table.version_ = storage::layout_version_t(2); table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3)); - // Insert (3, 300, 42) - The default value will be populated by the execution engine + // Insert (3, 300, 42, NULL) - The default value for col2 will be populated by the execution engine table.StartInsertRow(); table.SetIntColInRow(catalog::col_oid_t(0), 3); table.SetIntColInRow(catalog::col_oid_t(1), 300); @@ -969,29 +969,35 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { storage::TupleSlot row3_slot = table.EndInsertRow(txn); // Validate the tuples - // 1st row should be (1, 100, NULL) + // 1st row should be (1, 100, NULL, NULL) uint32_t id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); EXPECT_EQ(1, id); uint32_t col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row1_slot); EXPECT_EQ(100, col1); bool col2_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(2), row1_slot); EXPECT_TRUE(col2_is_null); + bool col3_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row1_slot); + EXPECT_TRUE(col3_is_null); - // 2nd tuple should be (2, 200, NULL) + // 2nd tuple should be (2, 200, NULL, NULL) id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row2_slot); EXPECT_EQ(2, id); col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row2_slot); EXPECT_EQ(200, col1); - col2_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(2), row1_slot); + col2_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(2), row2_slot); EXPECT_TRUE(col2_is_null); + col3_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row2_slot); + EXPECT_TRUE(col3_is_null); - // 3rd tuple should be (3, 300, 42) + // 3rd tuple should be (3, 300, 42, NULL) id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row3_slot); EXPECT_EQ(3, id); col1 = table.GetIntColInRow(txn, catalog::col_oid_t(1), row3_slot); EXPECT_EQ(300, col1); uint32_t col2 = table.GetIntColInRow(txn, catalog::col_oid_t(2), row3_slot); EXPECT_EQ(col2_default, col2); + col3_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row3_slot); + EXPECT_TRUE(col3_is_null); // TODO: Test the following cases // 2. Column has no default value but is then added later From 48bcfdef87803e4ecd97a792e6f0c807fe0b9abe Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 14:04:37 -0400 Subject: [PATCH 12/19] Passing default value test cases --- src/include/storage/sql_table.h | 6 ++-- src/include/storage/storage_defs.h | 5 ++- src/storage/projected_row.cpp | 1 + src/storage/sql_table.cpp | 56 ++++++++++++++++++++---------- test/storage/sql_table_test.cpp | 21 +++++------ 5 files changed, 56 insertions(+), 33 deletions(-) diff --git a/src/include/storage/sql_table.h b/src/include/storage/sql_table.h index 254f08d850..6c1953e657 100644 --- a/src/include/storage/sql_table.h +++ b/src/include/storage/sql_table.h @@ -32,8 +32,6 @@ class SqlTable { BlockLayout layout; ColumnMap column_map; InverseColumnMap inverse_column_map; - // TODO(Sai): Do we really need a defaultValueMap for every version? One for SqlTable should work? - DefaultValueMap default_value_map; }; /** @@ -322,6 +320,10 @@ class SqlTable { const catalog::table_oid_t oid_; common::ConcurrentMap tables_; + // NOTE: This map only keeps track of the default values specified at column creation + // For columns which don't have default value or added later, just set to null + // Populating default values into the ProjectedRow inserted later is taken care of by the execution engine + DefaultValueMap default_value_map_; /** * Given a set of col_oids, return a vector of corresponding col_ids to use for ProjectionInitialization diff --git a/src/include/storage/storage_defs.h b/src/include/storage/storage_defs.h index ed94f4b111..e2aa397b1e 100644 --- a/src/include/storage/storage_defs.h +++ b/src/include/storage/storage_defs.h @@ -175,8 +175,11 @@ using ColumnMap = std::unordered_map; /** * Used by execution and storage layers to map between col_oids and offsets within a ProjectedRow */ -using InverseColumnMap = std::unordered_map; using ProjectionMap = std::unordered_map; +using InverseColumnMap = std::unordered_map; +/** + * Used by SqlTable to map between col_oids in Schema and their {default_value, attribute_size} + */ using DefaultValueMap = std::unordered_map>; /** diff --git a/src/storage/projected_row.cpp b/src/storage/projected_row.cpp index 8e41e295ba..08314f42c4 100644 --- a/src/storage/projected_row.cpp +++ b/src/storage/projected_row.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index f6bce993b4..59bb7ba50a 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -1,5 +1,6 @@ #include "storage/sql_table.h" #include +#include #include #include @@ -75,12 +76,13 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { } // Populate the default value map - DefaultValueMap default_value_map; for (const auto &column : schema.GetColumns()) { + auto col_oid = column.GetOid(); byte *default_value = column.GetDefault(); - uint8_t attr_size = column.GetAttrSize(); - if (default_value != nullptr) { - default_value_map[column.GetOid()] = {default_value, attr_size}; + // Only populate the default values of the columns which are new and have a default value + if (default_value_map_.count(col_oid) == 0) { + uint8_t attr_size = column.GetAttrSize(); + default_value_map_[column.GetOid()] = {default_value, attr_size}; } } @@ -93,7 +95,7 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { // for this allocation is in the destructor for SqlTable. clang-analyzer-cplusplus.NewDeleteLeaks identifies this // as a potential leak and throws an error incorrectly. // NOLINTNEXTLINE - tables_.Insert(schema.GetVersion(), {dt, layout, col_map, inv_col_map, default_value_map}); + tables_.Insert(schema.GetVersion(), {dt, layout, col_map, inv_col_map}); } bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlot slot, ProjectedRow *const out_buffer, @@ -111,6 +113,9 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo return tables_.Find(version_num)->second.data_table->Select(txn, slot, out_buffer); } + // TODO(Sai): Instead of directly calling header mangling for version mismatch, + // is it better to verify that the columns in the PR required are exactly equal to the older version? + auto old_dt_version = tables_.Find(old_version_num)->second; auto curr_dt_version = tables_.Find(version_num)->second; @@ -122,17 +127,26 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo bool result = old_dt_version.data_table->Select(txn, slot, out_buffer); std::memcpy(out_buffer->ColumnIds(), original_column_ids, sizeof(col_id_t) * out_buffer->NumColumns()); - // Get the DefaultValueMap visible to this transaction - DefaultValueMap default_val_map = curr_dt_version.default_value_map; + // Default values should only be filled into the columns that are missing in the old_dt_version + // Do not fill default values for the columns which already exist in the old_dt_version - // Populate default values into the empty columns of the ProjectedRow (if any) - STORAGE_LOG_DEBUG("Loading default values") - for (uint16_t i = 0; i < out_buffer->NumColumns(); i++) { - catalog::col_oid_t col_oid = curr_dt_version.inverse_column_map.at(out_buffer->ColumnIds()[i]); - // Fill in the default value if the entry is not null and the col_oid is in the map - if (default_val_map.count(col_oid) > 0 && out_buffer->IsNull(i)) { - auto dv_pair = default_val_map.at(col_oid); - storage::StorageUtil::CopyWithNullCheck(dv_pair.first, out_buffer, dv_pair.second, i); + // Calculate the col_oids of out_buffer which are missing from the old version of datatable + std::unordered_set missing_col_oids; + for (const auto &[col_oid, offset] : pr_map) { + missing_col_oids.emplace(col_oid); + } + for (const auto &[col_oid, col_id] : old_dt_version.column_map) { + // Remove the col_oid from the missing_col_oids + missing_col_oids.erase(col_oid); + } + + // Fill in the default values for the missing columns + for (catalog::col_oid_t col_oid : missing_col_oids) { + TERRIER_ASSERT(default_value_map_.count(col_oid) > 0, "Every column in schema must exist in default_value_map"); + const auto &[default_value, attr_size] = default_value_map_.at(col_oid); + // If the default value for this column exists, copy it in to the PR + if (default_value != nullptr) { + storage::StorageUtil::CopyWithNullCheck(default_value, out_buffer, attr_size, pr_map.at(col_oid)); } } @@ -263,6 +277,8 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt layout_version_t version_num) const { layout_version_t dt_version_num = start_pos->curr_version_; + // TODO(Sai): Add version check before calling header mangling + // Also possibly checking for same columns instead in the PR and the old_dt_version? TERRIER_ASSERT(out_buffer->NumColumns() <= tables_.Find(version_num)->second.column_map.size(), "The output buffer never returns the version pointer columns, so it should have " "fewer attributes."); @@ -282,17 +298,21 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt if (filled > 0) { // Since we are populating multiple tuples, calculate the columns matching with default value map auto curr_dt_version = tables_.Find(version_num)->second; - DefaultValueMap default_val_map = curr_dt_version.default_value_map; + // DefaultValueMap default_val_map = curr_dt_version.default_value_map; // Find out the columns which contain default values and their projection list index std::vector> matching_cols; for (uint16_t i = 0; i < out_buffer->NumColumns(); i++) { catalog::col_oid_t col_oid = curr_dt_version.inverse_column_map.at(out_buffer->ColumnIds()[i]); // Fill in the default value if the entry is not null and the col_oid is in the map - if (default_val_map.count(col_oid) > 0) { + if (default_value_map_.count(col_oid) > 0) { matching_cols.emplace_back(col_oid, i); } } + // TODO(Sai): Make the change of the default value as in Select + // TODO(Sai): Try an efficient way of filling in default values for the ProjectedColumns? + // Scan returns ProjectedColumns from a single (older) version of DataTable. So, just copy the default values + // of the matching columns calculated above. // Fill in the matching default values for each of the rows // Since each row could have different columns unfilled, using RowView to iterate over ProjectedColumns for (uint32_t row_idx = 0; row_idx < filled; row_idx++) { @@ -301,7 +321,7 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt auto col_oid = col_oid_idx_pair.first; auto idx = col_oid_idx_pair.second; if (row.IsNull(idx)) { - auto dv_pair = default_val_map.at(col_oid); + auto dv_pair = default_value_map_.at(col_oid); storage::StorageUtil::CopyWithNullCheck(dv_pair.first, &row, dv_pair.second, idx); } } diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index 9a931969b2..2b48e10fd4 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -39,9 +39,8 @@ class SqlTableTestRW { cols_.emplace_back(name, type, 255, nullable, oid); } - // TODO: Change the function signature to make default_value as an optional argument void AddColumn(transaction::TransactionContext *txn, std::string name, type::TypeId type, bool nullable, - catalog::col_oid_t oid, byte *default_value=nullptr) { + catalog::col_oid_t oid, byte *default_value = nullptr) { // update columns, schema and layout cols_.emplace_back(name, type, nullable, oid, default_value); delete schema_; @@ -270,9 +269,7 @@ class SqlTableTestRW { * Set the attribute of col_oid to null * @param col_oid col_oid of the column in the schema */ - void SetNullInRow(catalog::col_oid_t col_oid) { - pr_->SetNull(pr_map_->at(col_oid)); - } + void SetNullInRow(catalog::col_oid_t col_oid) { pr_->SetNull(pr_map_->at(col_oid)); } /** * Read a string from a row @@ -837,7 +834,6 @@ TEST_F(SqlTableTests, MultipleColumnWidths) { // NOLINTNEXTLINE TEST_F(SqlTableTests, BasicDefaultValuesTest) { - // TODO: Test a typical Default value condition // Have 3 types of columns, 1 with default value at creation, one without and one with // default value added later explicitly. // Insert rows and check the output @@ -859,8 +855,7 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { // Explicitly set the layout version number table.version_ = storage::layout_version_t(1); int col2_default = 42; - table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2), - table.intToByteArray(col2_default)); + table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2), table.intToByteArray(col2_default)); // Insert (2, NULL, 890) table.StartInsertRow(); @@ -879,8 +874,7 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { // Add another column with a default value and insert a row table.version_ = storage::layout_version_t(2); int col3_default = 1729; - table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), - table.intToByteArray(col3_default)); + table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), table.intToByteArray(col3_default)); // Insert (3, 300, NULL, NULL) // NOTE: The default values for new rows are filled in by the execution engine @@ -956,7 +950,10 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { // Now set the default value of the column to something int col2_default = 42; - table.SetColumnDefault(catalog::col_oid_t(2), table.intToByteArray(col2_default)); + byte* col2_default_bytes = table.intToByteArray(col2_default); + table.SetColumnDefault(catalog::col_oid_t(2), col2_default_bytes); + delete[] col2_default_bytes; + // Add a new column - to trigger the UpdateSchema table.version_ = storage::layout_version_t(2); table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3)); @@ -999,7 +996,7 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { col3_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row3_slot); EXPECT_TRUE(col3_is_null); - // TODO: Test the following cases + // TODO(Sai): Test the following cases // 2. Column has no default value but is then added later // - Handled by the execution engine. Just make sure values are as expected // 3. Column has default value during creation, but removed later From b6631cf5015364fb4afc145cd1a79df1971dfc9e Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 16:17:20 -0400 Subject: [PATCH 13/19] Default value insertion for Scan. Fixed the Scan test case. --- src/include/catalog/schema.h | 1 + src/storage/data_table.cpp | 1 - src/storage/sql_table.cpp | 71 ++++++++++++++------------------- test/storage/sql_table_test.cpp | 23 ++++++----- 4 files changed, 44 insertions(+), 52 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 30dd5e7e9f..03a8abf571 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -25,6 +25,7 @@ class Schema { * the reliance on these classes */ class Column { + // TODO(Sai): DON'T use default parameters for function arguments. Everyone should explicitly pass in nullptr. public: /** * Instantiates a Column object, primary to be used for building a Schema object (non VARLEN attributes) diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp index 9040a4f3e5..9e53c7423b 100644 --- a/src/storage/data_table.cpp +++ b/src/storage/data_table.cpp @@ -199,7 +199,6 @@ bool DataTable::SelectIntoBuffer(transaction::TransactionContext *const txn, con // can potentially happen, and chase the version chain before returning anyway, for (uint16_t i = 0; i < out_buffer->NumColumns(); i++) { if (out_buffer->ColumnIds()[i] == VERSION_POINTER_COLUMN_ID) { - out_buffer->SetNull(i); // If we don't have this column, make sure its marked NULL continue; } StorageUtil::CopyAttrIntoProjection(accessor_, slot, out_buffer, i); diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 59bb7ba50a..8e7ed1e8b7 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -113,9 +113,6 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo return tables_.Find(version_num)->second.data_table->Select(txn, slot, out_buffer); } - // TODO(Sai): Instead of directly calling header mangling for version mismatch, - // is it better to verify that the columns in the PR required are exactly equal to the older version? - auto old_dt_version = tables_.Find(old_version_num)->second; auto curr_dt_version = tables_.Find(version_num)->second; @@ -132,22 +129,20 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo // Calculate the col_oids of out_buffer which are missing from the old version of datatable std::unordered_set missing_col_oids; - for (const auto &[col_oid, offset] : pr_map) { - missing_col_oids.emplace(col_oid); + for (const auto &it : pr_map) { + missing_col_oids.emplace(it.first); } - for (const auto &[col_oid, col_id] : old_dt_version.column_map) { + for (const auto &it : old_dt_version.column_map) { // Remove the col_oid from the missing_col_oids - missing_col_oids.erase(col_oid); + missing_col_oids.erase(it.first); } // Fill in the default values for the missing columns for (catalog::col_oid_t col_oid : missing_col_oids) { TERRIER_ASSERT(default_value_map_.count(col_oid) > 0, "Every column in schema must exist in default_value_map"); const auto &[default_value, attr_size] = default_value_map_.at(col_oid); - // If the default value for this column exists, copy it in to the PR - if (default_value != nullptr) { - storage::StorageUtil::CopyWithNullCheck(default_value, out_buffer, attr_size, pr_map.at(col_oid)); - } + // TODO(Sai): If this becomes a performance bottleneck, we can move this logic to ModifyProjectionHeaderForVersion + storage::StorageUtil::CopyWithNullCheck(default_value, out_buffer, attr_size, pr_map.at(col_oid)); } return result; @@ -275,55 +270,51 @@ std::pair SqlTable::Update(transaction::TransactionCon void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIterator *start_pos, ProjectedColumns *const out_buffer, const ProjectionMap &pr_map, layout_version_t version_num) const { - layout_version_t dt_version_num = start_pos->curr_version_; + layout_version_t old_version_num = start_pos->curr_version_; // TODO(Sai): Add version check before calling header mangling - // Also possibly checking for same columns instead in the PR and the old_dt_version? TERRIER_ASSERT(out_buffer->NumColumns() <= tables_.Find(version_num)->second.column_map.size(), "The output buffer never returns the version pointer columns, so it should have " "fewer attributes."); col_id_t original_column_ids[out_buffer->NumColumns()]; - ModifyProjectionHeaderForVersion(out_buffer, tables_.Find(version_num)->second, tables_.Find(dt_version_num)->second, + ModifyProjectionHeaderForVersion(out_buffer, tables_.Find(version_num)->second, tables_.Find(old_version_num)->second, original_column_ids); DataTable::SlotIterator *dt_slot = start_pos->GetDataTableSlotIterator(); - tables_.Find(dt_version_num)->second.data_table->Scan(txn, dt_slot, out_buffer); + tables_.Find(old_version_num)->second.data_table->Scan(txn, dt_slot, out_buffer); start_pos->AdvanceOnEndOfDatatable_(); uint32_t filled = out_buffer->NumTuples(); std::memcpy(out_buffer->ColumnIds(), original_column_ids, sizeof(col_id_t) * out_buffer->NumColumns()); out_buffer->SetNumTuples(filled); - // Populate the default values + auto old_dt_version = tables_.Find(old_version_num)->second; + auto curr_dt_version = tables_.Find(version_num)->second; + + // Populate the default values to all the scanned tuples if (filled > 0) { - // Since we are populating multiple tuples, calculate the columns matching with default value map - auto curr_dt_version = tables_.Find(version_num)->second; - // DefaultValueMap default_val_map = curr_dt_version.default_value_map; - // Find out the columns which contain default values and their projection list index - std::vector> matching_cols; - for (uint16_t i = 0; i < out_buffer->NumColumns(); i++) { - catalog::col_oid_t col_oid = curr_dt_version.inverse_column_map.at(out_buffer->ColumnIds()[i]); - // Fill in the default value if the entry is not null and the col_oid is in the map - if (default_value_map_.count(col_oid) > 0) { - matching_cols.emplace_back(col_oid, i); - } + // TODO(Sai): Abstract this out into a common function + // Calculate the col_oids of out_buffer which are missing from the old version of datatable + std::unordered_set missing_col_oids; + for (const auto &it : pr_map) { + missing_col_oids.emplace(it.first); + } + for (const auto &it : old_dt_version.column_map) { + // Remove the col_oid from the missing_col_oids + missing_col_oids.erase(it.first); } - // TODO(Sai): Make the change of the default value as in Select - // TODO(Sai): Try an efficient way of filling in default values for the ProjectedColumns? - // Scan returns ProjectedColumns from a single (older) version of DataTable. So, just copy the default values - // of the matching columns calculated above. - // Fill in the matching default values for each of the rows - // Since each row could have different columns unfilled, using RowView to iterate over ProjectedColumns + // TODO(Sai): The default values can be populated directly into the ProjectedColumns, making it faster than the + // case of column being present. Need to handle the bitmask operations correctly for null default values. + // Fill in the default values for the missing columns for (uint32_t row_idx = 0; row_idx < filled; row_idx++) { ProjectedColumns::RowView row = out_buffer->InterpretAsRow(row_idx); - for (auto col_oid_idx_pair : matching_cols) { - auto col_oid = col_oid_idx_pair.first; - auto idx = col_oid_idx_pair.second; - if (row.IsNull(idx)) { - auto dv_pair = default_value_map_.at(col_oid); - storage::StorageUtil::CopyWithNullCheck(dv_pair.first, &row, dv_pair.second, idx); - } + for (catalog::col_oid_t col_oid : missing_col_oids) { + TERRIER_ASSERT(default_value_map_.count(col_oid) > 0, "Every column in schema must exist in default_value_map"); + const auto &[default_value, attr_size] = default_value_map_.at(col_oid); + // TODO(Sai): If this becomes a performance bottleneck, we can move this logic to + // ModifyProjectionHeaderForVersion + storage::StorageUtil::CopyWithNullCheck(default_value, &row, attr_size, pr_map.at(col_oid)); } } } diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index 2b48e10fd4..a6b8077982 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -321,7 +321,7 @@ class SqlTableTestRW { * @param n an integer * @return byte array */ - byte *intToByteArray(int n) { + byte *IntToByteArray(int n) { auto byteArray = new byte[sizeof(n)]; memcpy(byteArray, reinterpret_cast(&n), sizeof(n)); return byteArray; @@ -399,7 +399,7 @@ TEST_F(SqlTableTests, SelectTest) { int default_val = 42; // Add a new column with a default value table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), - table.intToByteArray(default_val)); + table.IntToByteArray(default_val)); id = table.GetIntColInRow(txn, catalog::col_oid_t(0), row1_slot); EXPECT_EQ(100, id); @@ -626,7 +626,7 @@ TEST_F(SqlTableTests, ScanTest) { // new_col_map[100] = NULL (created before column added) // new_col_map[200] = NULL (created before column added) - // new_col_map[300] = NULL (inserted without value) + // new_col_map[300] = NULL (Not explicitly specified); new_col_map[400] = 42; seen_map[100] = false; @@ -655,12 +655,13 @@ TEST_F(SqlTableTests, ScanTest) { // manually set the version of the transaction to be 1 table.version_ = storage::layout_version_t(1); table.AddColumn(txn, "new_col", type::TypeId::INTEGER, true, catalog::col_oid_t(2), - table.intToByteArray(new_col_default_value)); + table.IntToByteArray(new_col_default_value)); - // insert (300, 10002, 1729) + // insert (300, 10002, 1729) - Default value populated by the execution engine table.StartInsertRow(); table.SetIntColInRow(catalog::col_oid_t(0), 300); table.SetIntColInRow(catalog::col_oid_t(1), datname_map[300]); + table.SetIntColInRow(catalog::col_oid_t(2), new_col_default_value); table.EndInsertRow(txn); // insert (400, 10003, 42) @@ -834,9 +835,9 @@ TEST_F(SqlTableTests, MultipleColumnWidths) { // NOLINTNEXTLINE TEST_F(SqlTableTests, BasicDefaultValuesTest) { - // Have 3 types of columns, 1 with default value at creation, one without and one with - // default value added later explicitly. - // Insert rows and check the output + // TODO(Sai): Merge the sql_table_test and sql_table_concurrent_test frameworks to avoid repetition for adding in + // columns. Have 3 types of columns, 1 with default value at creation, one without and one with default value added + // later explicitly. Insert rows and check the output SqlTableTestRW table(catalog::table_oid_t(2)); auto txn = txn_manager_.BeginTransaction(); @@ -855,7 +856,7 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { // Explicitly set the layout version number table.version_ = storage::layout_version_t(1); int col2_default = 42; - table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2), table.intToByteArray(col2_default)); + table.AddColumn(txn, "col2", type::TypeId::INTEGER, true, catalog::col_oid_t(2), table.IntToByteArray(col2_default)); // Insert (2, NULL, 890) table.StartInsertRow(); @@ -874,7 +875,7 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { // Add another column with a default value and insert a row table.version_ = storage::layout_version_t(2); int col3_default = 1729; - table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), table.intToByteArray(col3_default)); + table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), table.IntToByteArray(col3_default)); // Insert (3, 300, NULL, NULL) // NOTE: The default values for new rows are filled in by the execution engine @@ -950,7 +951,7 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { // Now set the default value of the column to something int col2_default = 42; - byte* col2_default_bytes = table.intToByteArray(col2_default); + byte *col2_default_bytes = table.IntToByteArray(col2_default); table.SetColumnDefault(catalog::col_oid_t(2), col2_default_bytes); delete[] col2_default_bytes; From 66330acb47d45407e523ec7edd74e05f6f78f6de Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 16:23:33 -0400 Subject: [PATCH 14/19] Added check for version match in Scan --- src/storage/sql_table.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 8e7ed1e8b7..da62b2baac 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -276,11 +276,20 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt TERRIER_ASSERT(out_buffer->NumColumns() <= tables_.Find(version_num)->second.column_map.size(), "The output buffer never returns the version pointer columns, so it should have " "fewer attributes."); + + DataTable::SlotIterator *dt_slot = start_pos->GetDataTableSlotIterator(); + + // Check for version match + if (old_version_num == version_num) { + tables_.Find(version_num)->second.data_table->Scan(txn, dt_slot, out_buffer); + start_pos->AdvanceOnEndOfDatatable_(); + return; + } + col_id_t original_column_ids[out_buffer->NumColumns()]; ModifyProjectionHeaderForVersion(out_buffer, tables_.Find(version_num)->second, tables_.Find(old_version_num)->second, original_column_ids); - DataTable::SlotIterator *dt_slot = start_pos->GetDataTableSlotIterator(); tables_.Find(old_version_num)->second.data_table->Scan(txn, dt_slot, out_buffer); start_pos->AdvanceOnEndOfDatatable_(); From 033f0cc3f012a658887686c39a1197384dbebffe Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 17:58:36 -0400 Subject: [PATCH 15/19] Handle null case in setting a default value --- src/include/catalog/schema.h | 30 ++++++++++++++---------------- test/storage/sql_table_test.cpp | 6 +----- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 03a8abf571..0e04cdec9c 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -114,27 +114,23 @@ class Schema { */ byte *GetDefault() const { return default_; } - /** - * Clear the default value of the column - */ - void ClearDefault() { - if (default_ != nullptr) { - // Free the memory allocate to the default value - delete default_; - default_ = nullptr; - } - } - /** * Set the default value of the column - * @param default_value default_value as a bytes array + * @param default_value default_value as a bytes array. Could be nullptr */ void SetDefault(byte *default_value) { - if (default_ == nullptr) { - default_ = new byte[attr_size_]; + // If explicitly setting the default value to null + if (default_value == nullptr && default_ != nullptr) { + // Free the memory allocated to the default value + delete default_; + default_ = nullptr; + } else { + if (default_ == nullptr) { + default_ = new byte[attr_size_]; + } + // Copy the new default value + std::memcpy(default_, default_value, attr_size_); } - // Copy the new default value - std::memcpy(default_, default_value, attr_size_); } private: @@ -144,6 +140,8 @@ class Schema { uint16_t max_varlen_size_; const bool nullable_; const col_oid_t oid_; + // TODO(Sai): Consider having a DefaultValueObject containing isNull, 16-byte variable and attribute size + // This avoids handling memory explicitly for default values byte *default_; }; diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index a6b8077982..18811b718e 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -998,12 +998,8 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { EXPECT_TRUE(col3_is_null); // TODO(Sai): Test the following cases - // 2. Column has no default value but is then added later - // - Handled by the execution engine. Just make sure values are as expected - // 3. Column has default value during creation, but removed later + // 1. Column has default value during creation, but removed later // - Old rows should return default value but new ones shouldn't - // 4. Column with a default value, but the value passed in is forced to be NULL. - // - Default values shouldn't be filled in for these. txn_manager_.Commit(txn, TestCallbacks::EmptyCallback, nullptr); delete txn; From 8ba1635786e85e0b9409bd10c6d250038d63e3ae Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 19:24:39 -0400 Subject: [PATCH 16/19] Refactoring missing_cols calculation --- src/include/storage/sql_table.h | 11 +++++++++++ src/storage/sql_table.cpp | 35 +++++++++++++++------------------ test/storage/sql_table_test.cpp | 4 ---- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/include/storage/sql_table.h b/src/include/storage/sql_table.h index c792df4e56..8483e412be 100644 --- a/src/include/storage/sql_table.h +++ b/src/include/storage/sql_table.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include "catalog/schema.h" @@ -356,5 +357,15 @@ class SqlTable { template void ModifyProjectionHeaderForVersion(RowType *out_buffer, const DataTableVersion &curr_dt_version, const DataTableVersion &old_dt_version, col_id_t *original_col_id_store) const; + + /** + * Calculate the columns of the ProjectionMap that are missing from the old version of datatable + * Used to find out the columns for which default values must be filled in + * @param pr_map ProjectionMap of the ProjectedRow passed into Select/Scan + * @param old_dt_version old version of the datatable + * @return Unordered set of missing column oids + */ + std::unordered_set GetMissingColumnOidsForVersion(const ProjectionMap &pr_map, + const DataTableVersion &old_dt_version) const; }; } // namespace terrier::storage diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 34619fce73..1d0d1c8728 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -129,14 +129,7 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo // Do not fill default values for the columns which already exist in the old_dt_version // Calculate the col_oids of out_buffer which are missing from the old version of datatable - std::unordered_set missing_col_oids; - for (const auto &it : pr_map) { - missing_col_oids.emplace(it.first); - } - for (const auto &it : old_dt_version.column_map) { - // Remove the col_oid from the missing_col_oids - missing_col_oids.erase(it.first); - } + std::unordered_set missing_col_oids = GetMissingColumnOidsForVersion(pr_map, old_dt_version); // Fill in the default values for the missing columns for (catalog::col_oid_t col_oid : missing_col_oids) { @@ -283,7 +276,6 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt layout_version_t version_num) const { layout_version_t old_version_num = start_pos->curr_version_; - // TODO(Sai): Add version check before calling header mangling TERRIER_ASSERT(out_buffer->NumColumns() <= tables_.Find(version_num)->second.column_map.size(), "The output buffer never returns the version pointer columns, so it should have " "fewer attributes."); @@ -313,16 +305,8 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt // Populate the default values to all the scanned tuples if (filled > 0) { - // TODO(Sai): Abstract this out into a common function // Calculate the col_oids of out_buffer which are missing from the old version of datatable - std::unordered_set missing_col_oids; - for (const auto &it : pr_map) { - missing_col_oids.emplace(it.first); - } - for (const auto &it : old_dt_version.column_map) { - // Remove the col_oid from the missing_col_oids - missing_col_oids.erase(it.first); - } + std::unordered_set missing_col_oids = GetMissingColumnOidsForVersion(pr_map, old_dt_version); // TODO(Sai): The default values can be populated directly into the ProjectedColumns, making it faster than the // case of column being present. Need to handle the bitmask operations correctly for null default values. @@ -332,7 +316,6 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt for (catalog::col_oid_t col_oid : missing_col_oids) { TERRIER_ASSERT(default_value_map_.count(col_oid) > 0, "Every column in schema must exist in default_value_map"); const auto &[default_value, attr_size] = default_value_map_.at(col_oid); - // TODO(Sai): If this becomes a performance bottleneck, we can move this logic to // ModifyProjectionHeaderForVersion storage::StorageUtil::CopyWithNullCheck(default_value, &row, attr_size, pr_map.at(col_oid)); } @@ -418,4 +401,18 @@ template void SqlTable::ModifyProjectionHeaderForVersion(Proje const DataTableVersion &old_dt_version, col_id_t *original_col_id_store) const; +std::unordered_set SqlTable::GetMissingColumnOidsForVersion( + const ProjectionMap &pr_map, const DataTableVersion &old_dt_version) const { + // Calculate the col_oids of out_buffer which are missing from the old version of datatable + std::unordered_set missing_col_oids; + for (const auto &it : pr_map) { + missing_col_oids.emplace(it.first); + } + for (const auto &it : old_dt_version.column_map) { + // Remove the col_oid from the missing_col_oids + missing_col_oids.erase(it.first); + } + return missing_col_oids; +} + } // namespace terrier::storage diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index cb6946365f..df3fe78bae 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -997,10 +997,6 @@ TEST_F(SqlTableTests, ModifyDefaultValuesTest) { col3_is_null = table.IsNullColInRow(txn, catalog::col_oid_t(3), row3_slot); EXPECT_TRUE(col3_is_null); - // TODO(Sai): Test the following cases - // 1. Column has default value during creation, but removed later - // - Old rows should return default value but new ones shouldn't - txn_manager_.Commit(txn, TestCallbacks::EmptyCallback, nullptr); delete txn; } From a9a45650428669215cf2e722920ec643d6287ea8 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 22:22:11 -0400 Subject: [PATCH 17/19] Updated test comments --- test/storage/sql_table_test.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/storage/sql_table_test.cpp b/test/storage/sql_table_test.cpp index df3fe78bae..26633bc143 100644 --- a/test/storage/sql_table_test.cpp +++ b/test/storage/sql_table_test.cpp @@ -833,11 +833,11 @@ TEST_F(SqlTableTests, MultipleColumnWidths) { delete txn; } +// TODO(Sai): Merge the sql_table_test and sql_table_concurrent_test frameworks to avoid repetition // NOLINTNEXTLINE TEST_F(SqlTableTests, BasicDefaultValuesTest) { - // TODO(Sai): Merge the sql_table_test and sql_table_concurrent_test frameworks to avoid repetition for adding in - // columns. Have 3 types of columns, 1 with default value at creation, one without and one with default value added - // later explicitly. Insert rows and check the output + // Test for adding new columns with default values + // Default value for a column should only be filled in for the versions that don't have that column SqlTableTestRW table(catalog::table_oid_t(2)); auto txn = txn_manager_.BeginTransaction(); @@ -878,7 +878,6 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { table.AddColumn(txn, "col3", type::TypeId::INTEGER, true, catalog::col_oid_t(3), table.IntToByteArray(col3_default)); // Insert (3, 300, NULL, NULL) - // NOTE: The default values for new rows are filled in by the execution engine table.StartInsertRow(); table.SetIntColInRow(catalog::col_oid_t(0), 3); table.SetIntColInRow(catalog::col_oid_t(1), 300); @@ -923,7 +922,7 @@ TEST_F(SqlTableTests, BasicDefaultValuesTest) { TEST_F(SqlTableTests, ModifyDefaultValuesTest) { // Testing the case of adding a column without a default value // and then setting the default value for that column. - // This should not retro-actively populate the default value for older tuples + // This should NOT retro-actively populate the default value for older tuples SqlTableTestRW table(catalog::table_oid_t(2)); auto txn = txn_manager_.BeginTransaction(); From 4fa1ee198506a14364b0fa6603ed5994c555fd07 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 23:08:02 -0400 Subject: [PATCH 18/19] Changed DefaultValueMap from unordered_map to ConcurrentMap. --- src/include/storage/storage_defs.h | 4 +++- src/storage/sql_table.cpp | 19 ++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/include/storage/storage_defs.h b/src/include/storage/storage_defs.h index e2aa397b1e..a3a1b5d086 100644 --- a/src/include/storage/storage_defs.h +++ b/src/include/storage/storage_defs.h @@ -10,6 +10,7 @@ #include "catalog/catalog_defs.h" #include "common/constants.h" #include "common/container/bitmap.h" +#include "common/container/concurrent_map.h" #include "common/hash_util.h" #include "common/macros.h" #include "common/object_pool.h" @@ -180,7 +181,8 @@ using InverseColumnMap = std::unordered_map; /** * Used by SqlTable to map between col_oids in Schema and their {default_value, attribute_size} */ -using DefaultValueMap = std::unordered_map>; +// using DefaultValueMap = std::unordered_map>; +using DefaultValueMap = common::ConcurrentMap>; /** * Denote whether a record modifies the logical delete column, used when DataTable inspects deltas diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index 1d0d1c8728..eecab41d87 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -80,9 +80,9 @@ void SqlTable::UpdateSchema(const catalog::Schema &schema) { auto col_oid = column.GetOid(); byte *default_value = column.GetDefault(); // Only populate the default values of the columns which are new and have a default value - if (default_value_map_.count(col_oid) == 0) { + if (default_value_map_.Find(col_oid) == default_value_map_.End()) { uint8_t attr_size = column.GetAttrSize(); - default_value_map_[column.GetOid()] = {default_value, attr_size}; + default_value_map_.Insert(column.GetOid(), {default_value, attr_size}); } } @@ -133,8 +133,11 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo // Fill in the default values for the missing columns for (catalog::col_oid_t col_oid : missing_col_oids) { - TERRIER_ASSERT(default_value_map_.count(col_oid) > 0, "Every column in schema must exist in default_value_map"); - const auto &[default_value, attr_size] = default_value_map_.at(col_oid); + TERRIER_ASSERT(default_value_map_.Find(col_oid) != default_value_map_.CEnd(), + "Every column in schema must exist in default_value_map"); + auto pair = default_value_map_.Find(col_oid)->second; + auto default_value = pair.first; + auto attr_size = pair.second; // TODO(Sai): If this becomes a performance bottleneck, we can move this logic to ModifyProjectionHeaderForVersion storage::StorageUtil::CopyWithNullCheck(default_value, out_buffer, attr_size, pr_map.at(col_oid)); } @@ -314,9 +317,11 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt for (uint32_t row_idx = 0; row_idx < filled; row_idx++) { ProjectedColumns::RowView row = out_buffer->InterpretAsRow(row_idx); for (catalog::col_oid_t col_oid : missing_col_oids) { - TERRIER_ASSERT(default_value_map_.count(col_oid) > 0, "Every column in schema must exist in default_value_map"); - const auto &[default_value, attr_size] = default_value_map_.at(col_oid); - // ModifyProjectionHeaderForVersion + TERRIER_ASSERT(default_value_map_.Find(col_oid) != default_value_map_.CEnd(), + "Every column in schema must exist in default_value_map"); + auto pair = default_value_map_.Find(col_oid)->second; + auto default_value = pair.first; + auto attr_size = pair.second; storage::StorageUtil::CopyWithNullCheck(default_value, &row, attr_size, pr_map.at(col_oid)); } } From 9384ea0229999882822569a6971e73e951369ad4 Mon Sep 17 00:00:00 2001 From: Sai Kiriti Badam Date: Sat, 4 May 2019 23:53:19 -0400 Subject: [PATCH 19/19] Refactoring for default value filling --- src/include/storage/sql_table.h | 10 ++++++++++ src/storage/sql_table.cpp | 33 ++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/include/storage/sql_table.h b/src/include/storage/sql_table.h index 8483e412be..eefb481d7b 100644 --- a/src/include/storage/sql_table.h +++ b/src/include/storage/sql_table.h @@ -367,5 +367,15 @@ class SqlTable { */ std::unordered_set GetMissingColumnOidsForVersion(const ProjectionMap &pr_map, const DataTableVersion &old_dt_version) const; + + /** + * Fill in the default value for the given column i.e. col_oid. The default value being filled in can be null + * @tparam RowType ProjectedRow or ProjectedColumns::RowView + * @param out_buffer ProjectedRow or ProjectedColumns::RowView + * @param col_oid OID of the column + * @param pr_map ProjectionMap of the RowType + */ + template + void FillDefaultValue(RowType *out_buffer, catalog::col_oid_t col_oid, const ProjectionMap &pr_map) const; }; } // namespace terrier::storage diff --git a/src/storage/sql_table.cpp b/src/storage/sql_table.cpp index eecab41d87..61916e9b1c 100644 --- a/src/storage/sql_table.cpp +++ b/src/storage/sql_table.cpp @@ -133,13 +133,7 @@ bool SqlTable::Select(transaction::TransactionContext *const txn, const TupleSlo // Fill in the default values for the missing columns for (catalog::col_oid_t col_oid : missing_col_oids) { - TERRIER_ASSERT(default_value_map_.Find(col_oid) != default_value_map_.CEnd(), - "Every column in schema must exist in default_value_map"); - auto pair = default_value_map_.Find(col_oid)->second; - auto default_value = pair.first; - auto attr_size = pair.second; - // TODO(Sai): If this becomes a performance bottleneck, we can move this logic to ModifyProjectionHeaderForVersion - storage::StorageUtil::CopyWithNullCheck(default_value, out_buffer, attr_size, pr_map.at(col_oid)); + FillDefaultValue(out_buffer, col_oid, pr_map); } return result; @@ -317,12 +311,7 @@ void SqlTable::Scan(transaction::TransactionContext *const txn, SqlTable::SlotIt for (uint32_t row_idx = 0; row_idx < filled; row_idx++) { ProjectedColumns::RowView row = out_buffer->InterpretAsRow(row_idx); for (catalog::col_oid_t col_oid : missing_col_oids) { - TERRIER_ASSERT(default_value_map_.Find(col_oid) != default_value_map_.CEnd(), - "Every column in schema must exist in default_value_map"); - auto pair = default_value_map_.Find(col_oid)->second; - auto default_value = pair.first; - auto attr_size = pair.second; - storage::StorageUtil::CopyWithNullCheck(default_value, &row, attr_size, pr_map.at(col_oid)); + FillDefaultValue(&row, col_oid, pr_map); } } } @@ -420,4 +409,22 @@ std::unordered_set SqlTable::GetMissingColumnOidsForVersion( return missing_col_oids; } +template +void SqlTable::FillDefaultValue(RowType *out_buffer, const catalog::col_oid_t col_oid, + const ProjectionMap &pr_map) const { + TERRIER_ASSERT(default_value_map_.Find(col_oid) != default_value_map_.CEnd(), + "Every column in schema must exist in default_value_map"); + auto pair = default_value_map_.Find(col_oid)->second; + auto default_value = pair.first; + auto attr_size = pair.second; + // TODO(Sai): If this becomes a performance bottleneck, we can move this logic to ModifyProjectionHeaderForVersion + storage::StorageUtil::CopyWithNullCheck(default_value, out_buffer, attr_size, pr_map.at(col_oid)); +} + +template void SqlTable::FillDefaultValue(ProjectedRow *, const catalog::col_oid_t, + const ProjectionMap &) const; +template void SqlTable::FillDefaultValue(ProjectedColumns::RowView *, + const catalog::col_oid_t, + const ProjectionMap &) const; + } // namespace terrier::storage