Skip to content

Commit

Permalink
use python list instead of std vector when returning compressed columns.
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Feb 22, 2014
1 parent 4b8664f commit eab3d91
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 4 deletions.
2 changes: 1 addition & 1 deletion cuda_hamming.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def alloc_device_memory_for_cols(self, compressed_columns_vec, vlq_mode, max_len
concate_col = numpy.zeros(max_length * 2 * 4).astype(numpy.uint8)
else:
concate_col = numpy.zeros(max_length * 2).astype(numpy.uint8)

gpu_alloc_objs = []
for col_idx in range(0, 64):

Expand Down
71 changes: 71 additions & 0 deletions fastdict/fastdict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,39 @@ class FastCompressDict: public FastDict<IdType> {
dict_status = 2;
}

void init_runtime_python_dict() {

std::pair<std::vector<uint8_t>, std::pair<std::vector<std::vector<BitCountType> >, std::vector<IdType> > > me;

BOOST_FOREACH(me, column_dict) {
boost::python::list columns;
std::vector<uint32_t> columns_length;

std::vector<BitCountType> column;
BOOST_FOREACH(column, me.second.first) {
BitCountType* column_as_array = new BitCountType[column.size()];

int ele_index = 0;
BOOST_FOREACH(BitCountType ele, column) {
column_as_array[ele_index++] = ele;
}

PyObject* buffer_obj = PyBuffer_FromMemory ((void*)column_as_array, column.size() * sizeof(BitCountType));
boost::python::incref(buffer_obj);

columns.append(*buffer_obj);
columns_length.insert(columns_length.end(), column.size());
}

std::pair<boost::python::list, std::vector<IdType> > nested_pair(columns, me.second.second);
std::pair<std::vector<uint32_t>, std::pair<boost::python::list, std::vector<IdType> > > pair(columns_length, nested_pair);
runtime_python_dict[me.first] = pair;
}
column_dict.clear();

dict_status = 2;
}

// initiate runtime dict for VLQ base64 column dict
void init_runtime_VLQ_base64_dict() {

Expand Down Expand Up @@ -638,6 +671,17 @@ class FastCompressDict: public FastDict<IdType> {
return buffers;
}

PyObject* get_python_cols_as_buffer(uint32_t key) {
std::vector<uint8_t> bool_key = super::actual_key(key);

if (runtime_python_dict.count(bool_key) > 0)
return boost::python::incref(runtime_python_dict[bool_key].second.first.ptr());
else {
boost::python::list pylist;
return boost::python::incref(pylist.ptr());
}
}

// for non VQL base64 runtime dict
std::vector<std::vector<PyObject*> > mget_cols_as_buffer(boost::python::list& keys) {

Expand All @@ -648,7 +692,17 @@ class FastCompressDict: public FastDict<IdType> {
}
return return_vector;
}

PyObject* mget_python_cols_as_buffer(boost::python::list& keys) {

boost::python::list return_vector;

for (int i = 0; i < len(keys); i++) {
return_vector.append(*get_python_cols_as_buffer(boost::python::extract<uint32_t>(keys[i])));
}
return boost::python::incref(return_vector.ptr());
}


// for VLQ base64 runtime dict
std::vector<PyObject*> get_VLQ_base64_cols_as_buffer(uint32_t key) {
Expand Down Expand Up @@ -690,6 +744,8 @@ class FastCompressDict: public FastDict<IdType> {

if (runtime_dict.count(bool_key) > 0)
return runtime_dict[bool_key].second.second;
else if (runtime_python_dict.count(bool_key) > 0)
return runtime_python_dict[bool_key].second.second;
else {
std::vector<IdType> id_vector(0);
return id_vector;
Expand Down Expand Up @@ -1074,6 +1130,9 @@ class FastCompressDict: public FastDict<IdType> {

std::map<std::vector<uint8_t>, std::pair<std::vector<uint32_t>, std::pair<std::vector<BitCountType*>, std::vector<IdType> > > > runtime_dict;

// use boost::python::list instead of std::vector to store runtime dict
std::map<std::vector<uint8_t>, std::pair<std::vector<uint32_t>, std::pair<boost::python::list, std::vector<IdType> > > > runtime_python_dict;

std::map<std::vector<uint8_t>, std::pair<std::vector<uint32_t>, std::pair<std::vector<char*>, std::vector<IdType> > > > runtime_vlq_dict;

};
Expand Down Expand Up @@ -1226,14 +1285,17 @@ BOOST_PYTHON_MODULE(fastdict)
.def("get_binary_codes", &FastCompressDict<uint8_t, uint32_t>::get_binary_codes)
.def("mget_binary_codes", &FastCompressDict<uint8_t, uint32_t>::mget_binary_codes)
.def("get_cols_as_buffer", &FastCompressDict<uint8_t, uint32_t>::get_cols_as_buffer)
.def("get_python_cols_as_buffer", &FastCompressDict<uint8_t, uint32_t>::get_python_cols_as_buffer)
.def("mget_cols_as_buffer", &FastCompressDict<uint8_t, uint32_t>::mget_cols_as_buffer)
.def("mget_python_cols_as_buffer", &FastCompressDict<uint8_t, uint32_t>::mget_python_cols_as_buffer)
.def("get_image_ids", &FastCompressDict<uint8_t, uint32_t>::get_image_ids)
.def("mget_image_ids", &FastCompressDict<uint8_t, uint32_t>::mget_image_ids)
.def("get_image_ids_before_runtime_init", &FastCompressDict<uint8_t, uint32_t>::get_image_ids_before_runtime_init)
.def("mget_image_ids_before_runtime_init", &FastCompressDict<uint8_t, uint32_t>::mget_image_ids_before_runtime_init)
.def("get_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint8_t, uint32_t>::get_VLQ_base64_image_ids_before_runtime_init)
.def("mget_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint8_t, uint32_t>::mget_VLQ_base64_image_ids_before_runtime_init)
.def("init_runtime_dict", &FastCompressDict<uint8_t, uint32_t>::init_runtime_dict)
.def("init_runtime_python_dict", &FastCompressDict<uint8_t, uint32_t>::init_runtime_python_dict)
.def("base64VLQ_encode", &FastCompressDict<uint8_t, uint32_t>::base64VLQ_encode<uint8_t>)
.def("base64VLQ_decode", &FastCompressDict<uint8_t, uint32_t>::base64VLQ_decode<uint8_t>)
.def("to_VLQ_base64_dict", &FastCompressDict<uint8_t, uint32_t>::to_VLQ_base64_dict)
Expand Down Expand Up @@ -1311,14 +1373,17 @@ BOOST_PYTHON_MODULE(fastdict)
.def("get_binary_codes", &FastCompressDict<uint32_t, uint32_t>::get_binary_codes)
.def("mget_binary_codes", &FastCompressDict<uint32_t, uint32_t>::mget_binary_codes)
.def("get_cols_as_buffer", &FastCompressDict<uint32_t, uint32_t>::get_cols_as_buffer)
.def("get_python_cols_as_buffer", &FastCompressDict<uint32_t, uint32_t>::get_python_cols_as_buffer)
.def("mget_cols_as_buffer", &FastCompressDict<uint32_t, uint32_t>::mget_cols_as_buffer)
.def("mget_python_cols_as_buffer", &FastCompressDict<uint32_t, uint32_t>::mget_python_cols_as_buffer)
.def("get_image_ids", &FastCompressDict<uint32_t, uint32_t>::get_image_ids)
.def("mget_image_ids", &FastCompressDict<uint32_t, uint32_t>::mget_image_ids)
.def("get_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint32_t>::get_image_ids_before_runtime_init)
.def("mget_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint32_t>::mget_image_ids_before_runtime_init)
.def("get_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint32_t>::get_VLQ_base64_image_ids_before_runtime_init)
.def("mget_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint32_t>::mget_VLQ_base64_image_ids_before_runtime_init)
.def("init_runtime_dict", &FastCompressDict<uint32_t, uint32_t>::init_runtime_dict)
.def("init_runtime_python_dict", &FastCompressDict<uint32_t, uint32_t>::init_runtime_python_dict)
.def("base64VLQ_encode", &FastCompressDict<uint32_t, uint32_t>::base64VLQ_encode<uint32_t>)
.def("base64VLQ_decode", &FastCompressDict<uint32_t, uint32_t>::base64VLQ_decode<uint32_t>)
.def("to_VLQ_base64_dict", &FastCompressDict<uint32_t, uint32_t>::to_VLQ_base64_dict)
Expand Down Expand Up @@ -1388,14 +1453,17 @@ BOOST_PYTHON_MODULE(fastdict)
.def("get_binary_codes", &FastCompressDict<uint32_t, uint8_t>::get_binary_codes)
.def("mget_binary_codes", &FastCompressDict<uint32_t, uint8_t>::mget_binary_codes)
.def("get_cols_as_buffer", &FastCompressDict<uint32_t, uint8_t>::get_cols_as_buffer)
.def("get_python_cols_as_buffer", &FastCompressDict<uint32_t, uint8_t>::get_python_cols_as_buffer)
.def("mget_cols_as_buffer", &FastCompressDict<uint32_t, uint8_t>::mget_cols_as_buffer)
.def("mget_python_cols_as_buffer", &FastCompressDict<uint32_t, uint8_t>::mget_python_cols_as_buffer)
.def("get_image_ids", &FastCompressDict<uint32_t, uint8_t>::get_image_ids)
.def("mget_image_ids", &FastCompressDict<uint32_t, uint8_t>::mget_image_ids)
.def("get_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint8_t>::get_image_ids_before_runtime_init)
.def("mget_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint8_t>::mget_image_ids_before_runtime_init)
.def("get_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint8_t>::get_VLQ_base64_image_ids_before_runtime_init)
.def("mget_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint32_t, uint8_t>::mget_VLQ_base64_image_ids_before_runtime_init)
.def("init_runtime_dict", &FastCompressDict<uint32_t, uint8_t>::init_runtime_dict)
.def("init_runtime_python_dict", &FastCompressDict<uint32_t, uint8_t>::init_runtime_python_dict)
.def("base64VLQ_encode", &FastCompressDict<uint32_t, uint8_t>::base64VLQ_encode<uint32_t>)
.def("base64VLQ_decode", &FastCompressDict<uint32_t, uint8_t>::base64VLQ_decode<uint32_t>)
.def("to_VLQ_base64_dict", &FastCompressDict<uint32_t, uint8_t>::to_VLQ_base64_dict)
Expand Down Expand Up @@ -1466,14 +1534,17 @@ BOOST_PYTHON_MODULE(fastdict)
.def("get_binary_codes", &FastCompressDict<uint32_t, std::string>::get_binary_codes)
.def("mget_binary_codes", &FastCompressDict<uint32_t, std::string>::mget_binary_codes)
.def("get_cols_as_buffer", &FastCompressDict<uint32_t, std::string>::get_cols_as_buffer)
.def("get_python_cols_as_buffer", &FastCompressDict<uint32_t, std::string>::get_python_cols_as_buffer)
.def("mget_cols_as_buffer", &FastCompressDict<uint32_t, std::string>::mget_cols_as_buffer)
.def("mget_python_cols_as_buffer", &FastCompressDict<uint32_t, std::string>::mget_python_cols_as_buffer)
.def("get_image_ids", &FastCompressDict<uint32_t, std::string>::get_image_ids)
.def("mget_image_ids", &FastCompressDict<uint32_t, std::string>::mget_image_ids)
.def("get_image_ids_before_runtime_init", &FastCompressDict<uint32_t, std::string>::get_image_ids_before_runtime_init)
.def("mget_image_ids_before_runtime_init", &FastCompressDict<uint32_t, std::string>::mget_image_ids_before_runtime_init)
.def("get_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint32_t, std::string>::get_VLQ_base64_image_ids_before_runtime_init)
.def("mget_VLQ_base64_image_ids_before_runtime_init", &FastCompressDict<uint32_t, std::string>::mget_VLQ_base64_image_ids_before_runtime_init)
.def("init_runtime_dict", &FastCompressDict<uint32_t, std::string>::init_runtime_dict)
.def("init_runtime_python_dict", &FastCompressDict<uint32_t, std::string>::init_runtime_python_dict)
.def("base64VLQ_encode", &FastCompressDict<uint32_t, std::string>::base64VLQ_encode<uint32_t>)
.def("base64VLQ_decode", &FastCompressDict<uint32_t, std::string>::base64VLQ_decode<uint32_t>)
.def("to_VLQ_base64_dict", &FastCompressDict<uint32_t, std::string>::to_VLQ_base64_dict)
Expand Down
150 changes: 150 additions & 0 deletions fastdict/fastdict_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,57 @@ def test_runtimedict(self):
self.assertEqual(data[0], 0)
index += 1
buffer_index += 1

def test_runtime_python_dict(self):
f_dict = fastdict.FastCompressUInt32IntDict(self.dimension)
f_dict.set(123, 6794572984750169060, 0)
f_dict.append(123, 678, 1)
f_dict.batch_append([123, 123], [456, 123123], [3, 4])
f_dict.set(456, 789, 2)
f_dict.set(789, 123, 3)

f_dict.go_index()
f_dict.init_runtime_python_dict()

cols_buffer = f_dict.get_python_cols_as_buffer(123)
# 64 columns
self.assertEqual(len(cols_buffer), 64)
index = 0
for buffers in cols_buffer:
if index == 0:
self.assertEqual(len(buffers), 16)
for i in range(0, len(buffers) / 4):
data = ''
for j in range(i * 4, i * 4 + 4):
data = data + buffers[j]
data = struct.unpack('I', data)
if index == 0:
if i == 0:
self.assertEqual(data[0], 2)
if i == 1:
self.assertEqual(data[0], 1)
index += 1

cols_buffers = f_dict.mget_python_cols_as_buffer([123, 456])
self.assertEqual(len(cols_buffers), 2)

buffer_index = 0
for cols_buffer in cols_buffers:
index = 0
for buffers in cols_buffer:
for i in range(0, len(buffers) / 4):
data = ''
for j in range(i * 4, i * 4 + 4):
data = data + buffers[j]
data = struct.unpack('I', data)
if index == 63 and buffer_index == 1:
if i == 0:
self.assertEqual(data[0], 1)
if i == 1:
self.assertEqual(data[0], 0)
index += 1
buffer_index += 1

def test_VLQ_base64(self):
vlq_dict = fastdict.FastCompressUInt32IntDict(8)
self.assertEqual(vlq_dict.base64VLQ_encode(123123), 'zn4D')
Expand Down Expand Up @@ -541,7 +591,57 @@ def test_runtimedict(self):
self.assertEqual(data[0], 0)
index += 1
buffer_index += 1

def test_runtime_python_dict(self):
f_dict = fastdict.FastCompressUInt32Int8Dict(self.dimension)
f_dict.set(123, 6794572984750169060, 0)
f_dict.append(123, 678, 1)
f_dict.batch_append([123, 123], [456, 123123], [3, 4])
f_dict.set(456, 789, 2)
f_dict.set(789, 123, 3)

f_dict.go_index()
f_dict.init_runtime_python_dict()

cols_buffer = f_dict.get_python_cols_as_buffer(123)
# 64 columns
self.assertEqual(len(cols_buffer), 64)
index = 0
for buffers in cols_buffer:
if index == 0:
self.assertEqual(len(buffers), 16)
for i in range(0, len(buffers) / 4):
data = ''
for j in range(i * 4, i * 4 + 4):
data = data + buffers[j]
data = struct.unpack('I', data)
if index == 0:
if i == 0:
self.assertEqual(data[0], 2)
if i == 1:
self.assertEqual(data[0], 1)
index += 1

cols_buffers = f_dict.mget_python_cols_as_buffer([123, 456])
self.assertEqual(len(cols_buffers), 2)

buffer_index = 0
for cols_buffer in cols_buffers:
index = 0
for buffers in cols_buffer:
for i in range(0, len(buffers) / 4):
data = ''
for j in range(i * 4, i * 4 + 4):
data = data + buffers[j]
data = struct.unpack('I', data)
if index == 63 and buffer_index == 1:
if i == 0:
self.assertEqual(data[0], 1)
if i == 1:
self.assertEqual(data[0], 0)
index += 1
buffer_index += 1

def test_VLQ_base64(self):
vlq_dict = fastdict.FastCompressUInt32Int8Dict(8)
self.assertEqual(vlq_dict.base64VLQ_encode(123123), 'zn4D')
Expand Down Expand Up @@ -864,7 +964,57 @@ def test_runtimedict(self):
self.assertEqual(data[0], 0)
index += 1
buffer_index += 1

def test_runtime_python_dict(self):
f_dict = fastdict.FastCompressUInt32StringDict(self.dimension)
f_dict.set(123, 6794572984750169060, "0")
f_dict.append(123, 678, "1")
f_dict.batch_append([123, 123], [456, 123123], ["3", "4"])
f_dict.set(456, 789, "2")
f_dict.set(789, 123, "3")

f_dict.go_index()
f_dict.init_runtime_python_dict()

cols_buffer = f_dict.get_python_cols_as_buffer(123)
# 64 columns
self.assertEqual(len(cols_buffer), 64)
index = 0
for buffers in cols_buffer:
if index == 0:
self.assertEqual(len(buffers), 16)
for i in range(0, len(buffers) / 4):
data = ''
for j in range(i * 4, i * 4 + 4):
data = data + buffers[j]
data = struct.unpack('I', data)
if index == 0:
if i == 0:
self.assertEqual(data[0], 2)
if i == 1:
self.assertEqual(data[0], 1)
index += 1

cols_buffers = f_dict.mget_python_cols_as_buffer([123, 456])
self.assertEqual(len(cols_buffers), 2)

buffer_index = 0
for cols_buffer in cols_buffers:
index = 0
for buffers in cols_buffer:
for i in range(0, len(buffers) / 4):
data = ''
for j in range(i * 4, i * 4 + 4):
data = data + buffers[j]
data = struct.unpack('I', data)
if index == 63 and buffer_index == 1:
if i == 0:
self.assertEqual(data[0], 1)
if i == 1:
self.assertEqual(data[0], 0)
index += 1
buffer_index += 1

def test_VLQ_base64(self):
vlq_dict = fastdict.FastCompressUInt32StringDict(8)
self.assertEqual(vlq_dict.base64VLQ_encode(123123), 'zn4D')
Expand Down
3 changes: 2 additions & 1 deletion lshash.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,9 @@ def query_in_compressed_domain(self, query_point, num_results=None, expand_level

return self.sorting(image_ids, hamming_distances)

except:
except Exception as e:
print "Exception found in computing hamming distance."
print e
return []


Expand Down
4 changes: 2 additions & 2 deletions storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def init_runtime(self):
if not self.inited_runtime:
print "init rumtime dict..."
if self.storage.get_dict_status() == 0:
self.storage.init_runtime_dict()
self.storage.init_runtime_python_dict()
else:
print "Incorrect dict mode."
print "done."
Expand Down Expand Up @@ -356,7 +356,7 @@ def get_compressed_cols(self, reference_key, level = 0):

if self.storage.get_dict_status() == 2:
print "compressed runtime dict"
cols = self.storage.mget_cols_as_buffer(self.actual_keys(reference_key, level).tolist())
cols = self.storage.mget_python_cols_as_buffer(self.actual_keys(reference_key, level).tolist())
image_ids = self.storage.mget_image_ids(self.actual_keys(reference_key, level).tolist())
elif self.storage.get_dict_status() == 3:
print "VLQ base64 compressed runtime dict"
Expand Down

0 comments on commit eab3d91

Please sign in to comment.