Skip to content

Commit

Permalink
add data type stringArray
Browse files Browse the repository at this point in the history
  • Loading branch information
zcdb committed Apr 16, 2024
1 parent 08f1fd8 commit fb001df
Show file tree
Hide file tree
Showing 25 changed files with 424 additions and 271 deletions.
43 changes: 21 additions & 22 deletions internal/client/client.go
Expand Up @@ -297,7 +297,7 @@ func (r *routerRequest) Execute() []*vearchpb.Item {
spacePro := r.space.SpaceProperties
for field, pro := range spacePro {
format := pro.Format
if pro.FieldType == entity.FieldType_VECTOR && format != nil && (*format == "normalization" || *format == "normal") {
if pro.FieldType == vearchpb.FieldType_VECTOR && format != nil && (*format == "normalization" || *format == "normal") {
normalField[field] = field
}
}
Expand Down Expand Up @@ -634,7 +634,7 @@ func (r *routerRequest) SearchFieldSortExecute(sortOrder sortorder.SortOrder) *v
spacePro := r.space.SpaceProperties
for field, pro := range spacePro {
format := pro.Format
if pro.FieldType == entity.FieldType_VECTOR && format != nil && (*format == "normalization" || *format == "normal") {
if pro.FieldType == vearchpb.FieldType_VECTOR && format != nil && (*format == "normalization" || *format == "normal") {
normalField[field] = field
}
}
Expand Down Expand Up @@ -1104,25 +1104,24 @@ func GetSource(doc *vearchpb.ResultItem, space *entity.Space, sortFieldMap map[s
continue
}
switch field.FieldType {
case entity.FieldType_STRING:
case vearchpb.FieldType_STRING:
tempValue := string(fv.Value)
if field.Array {
source[name] = strings.Split(tempValue, string([]byte{'\001'}))
} else {
source[name] = tempValue
if sortFieldMap != nil && sortFieldMap[name] != "" {
for i, v := range sortFields {
if v.Field == name {
sortValues[i] = &sortorder.StringSortValue{
Val: tempValue,
SortName: name,
}
break
source[name] = tempValue
if sortFieldMap != nil && sortFieldMap[name] != "" {
for i, v := range sortFields {
if v.Field == name {
sortValues[i] = &sortorder.StringSortValue{
Val: tempValue,
SortName: name,
}
break
}
}
}
case entity.FieldType_INT:
case vearchpb.FieldType_STRINGARRAY:
tempValue := string(fv.Value)
source[name] = strings.Split(tempValue, string([]byte{'\001'}))
case vearchpb.FieldType_INT:
intVal := cbbytes.Bytes2Int32(fv.Value)
source[name] = intVal
if sortFieldMap != nil && sortFieldMap[name] != "" {
Expand All @@ -1136,7 +1135,7 @@ func GetSource(doc *vearchpb.ResultItem, space *entity.Space, sortFieldMap map[s
}
}
}
case entity.FieldType_LONG:
case vearchpb.FieldType_LONG:
longVal := cbbytes.Bytes2Int(fv.Value)
source[name] = longVal
if sortFieldMap != nil && sortFieldMap[name] != "" {
Expand All @@ -1150,16 +1149,16 @@ func GetSource(doc *vearchpb.ResultItem, space *entity.Space, sortFieldMap map[s
}
}
}
case entity.FieldType_BOOL:
case vearchpb.FieldType_BOOL:
if cbbytes.Bytes2Int(fv.Value) == 0 {
source[name] = false
} else {
source[name] = true
}
case entity.FieldType_DATE:
case vearchpb.FieldType_DATE:
u := cbbytes.Bytes2Int(fv.Value)
source[name] = time.Unix(u/1e6, u%1e6)
case entity.FieldType_FLOAT:
case vearchpb.FieldType_FLOAT:
floatVal := cbbytes.ByteToFloat64(fv.Value)
source[name] = floatVal

Expand All @@ -1174,7 +1173,7 @@ func GetSource(doc *vearchpb.ResultItem, space *entity.Space, sortFieldMap map[s
}
}
}
case entity.FieldType_DOUBLE:
case vearchpb.FieldType_DOUBLE:
floatVal := cbbytes.ByteToFloat64(fv.Value)
source[name] = floatVal
if sortFieldMap != nil && sortFieldMap[name] != "" {
Expand All @@ -1189,7 +1188,7 @@ func GetSource(doc *vearchpb.ResultItem, space *entity.Space, sortFieldMap map[s
}
}

case entity.FieldType_VECTOR:
case vearchpb.FieldType_VECTOR:
if space.Index.Type == "BINARYIVF" {
featureByteC := fv.Value
dimension := field.Dimension
Expand Down
10 changes: 1 addition & 9 deletions internal/engine/c_api/api_data/table.h
Expand Up @@ -13,19 +13,11 @@
#include "idl/fbs-gen/c/doc_generated.h"
#include "idl/fbs-gen/c/response_generated.h"
#include "idl/fbs-gen/c/table_generated.h"
#include "idl/fbs-gen/c/types_generated.h"
#include "raw_data.h"

namespace vearch {

enum class DataType : std::uint16_t {
INT = 0,
LONG,
FLOAT,
DOUBLE,
STRING,
VECTOR
};

struct VectorInfo {
std::string name;
DataType data_type;
Expand Down
17 changes: 13 additions & 4 deletions internal/engine/idl/fbs-gen/c/types_generated.h
Expand Up @@ -12,17 +12,23 @@ enum DataType {
FLOAT = 2,
DOUBLE = 3,
STRING = 4,
VECTOR = 5
VECTOR = 5,
BOOL = 6,
DATE = 7,
STRINGARRAY = 8
};

inline const DataType (&EnumValuesDataType())[6] {
inline const DataType (&EnumValuesDataType())[9] {
static const DataType values[] = {
INT,
LONG,
FLOAT,
DOUBLE,
STRING,
VECTOR
VECTOR,
BOOL,
DATE,
STRINGARRAY
};
return values;
}
Expand All @@ -35,13 +41,16 @@ inline const char * const *EnumNamesDataType() {
"DOUBLE",
"STRING",
"VECTOR",
"BOOL",
"DATE",
"STRINGARRAY",
nullptr
};
return names;
}

inline const char *EnumNameDataType(DataType e) {
if (e < INT || e > VECTOR) return "";
if (e < INT || e > STRINGARRAY) return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesDataType()[index];
}
Expand Down
6 changes: 6 additions & 0 deletions internal/engine/idl/fbs-gen/go/gamma_api/DataType.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions internal/engine/idl/fbs-gen/python/DataType.py
Expand Up @@ -9,4 +9,7 @@ class DataType(object):
DOUBLE = 3
STRING = 4
VECTOR = 5
BOOL = 6
DATE = 7
STRINGARRAY = 8

2 changes: 1 addition & 1 deletion internal/engine/idl/fbs/types.fbs
@@ -1 +1 @@
enum DataType : byte { INT = 0, LONG, FLOAT, DOUBLE, STRING, VECTOR }
enum DataType : byte { INT = 0, LONG, FLOAT, DOUBLE, STRING, VECTOR, BOOL, DATE, STRINGARRAY }
4 changes: 2 additions & 2 deletions internal/engine/index/impl/gpu/gamma_index_ivfpq_gpu.cc
Expand Up @@ -640,7 +640,7 @@ int ParseFilters(SearchCondition *condition,
return -1;
}

if (type == DataType::STRING) {
if (type == DataType::STRING || type == DataType::STRINGARRAY) {
LOG(ERROR) << range.field << " can't be range filter";
return -1;
}
Expand All @@ -656,7 +656,7 @@ int ParseFilters(SearchCondition *condition,
return -1;
}

if (type != DataType::STRING) {
if (type != DataType::STRING && type != DataType::STRINGARRAY) {
LOG(ERROR) << term.field << " can't be term filter";
return -1;
}
Expand Down
16 changes: 9 additions & 7 deletions internal/engine/sdk/go/gamma/table.go
Expand Up @@ -15,12 +15,15 @@ import (
type DataType int8

const (
INT DataType = 0
LONG DataType = 1
FLOAT DataType = 2
DOUBLE DataType = 3
STRING DataType = 4
VECTOR DataType = 5
INT DataType = 0
LONG DataType = 1
FLOAT DataType = 2
DOUBLE DataType = 3
STRING DataType = 4
VECTOR DataType = 5
BOOL DataType = 6
DATE DataType = 7
STRINGARRAY DataType = 8
)

type VectorInfo struct {
Expand Down Expand Up @@ -56,7 +59,6 @@ func (table *Table) Serialize(out *[]byte) int {
field := table.Fields[i]
fieldNames[i] = builder.CreateString(field.Name)
}

fieldInfos := make([]flatbuffers.UOffsetT, len(table.Fields))
for i := 0; i < len(table.Fields); i++ {
field := table.Fields[i]
Expand Down
4 changes: 2 additions & 2 deletions internal/engine/table/field_range_index.cc
Expand Up @@ -416,7 +416,7 @@ FieldRangeIndex::FieldRangeIndex(std::string &path, int field_idx,
main_mgr_->maxleaves = bt_param.maxleaves;
#endif

if (field_type == DataType::STRING) {
if (field_type == DataType::STRING || field_type == DataType::STRINGARRAY) {
is_numeric_ = false;
} else {
is_numeric_ = true;
Expand Down Expand Up @@ -807,7 +807,7 @@ int FieldRangeIndex::Search(const string &tags, RangeQueryResult *result) {
bt_close(bt);

if (ret < 0) {
LOG(INFO) << "find node failed, key=" << item;
LOG(WARNING) << "find node failed, key=" << item;
continue;
}
if (p_node == nullptr) {
Expand Down
12 changes: 6 additions & 6 deletions internal/engine/table/table.cc
Expand Up @@ -213,7 +213,7 @@ int Table::FTypeSize(DataType fType) {
length = sizeof(float);
} else if (fType == DataType::DOUBLE) {
length = sizeof(double);
} else if (fType == DataType::STRING) {
} else if (fType == DataType::STRING || fType == DataType::STRINGARRAY) {
length = 0;
}
return length;
Expand Down Expand Up @@ -290,7 +290,7 @@ int Table::Add(const std::string &key,

for (size_t i = 0; i < attrs_.size(); i++) {
DataType data_type = attrs_[i];
if (data_type != DataType::STRING) {
if (data_type != DataType::STRING && data_type != DataType::STRINGARRAY) {
continue;
}

Expand All @@ -306,7 +306,7 @@ int Table::Add(const std::string &key,

DataType attr = attr_type_map_[name];

if (attr != DataType::STRING) {
if (attr != DataType::STRING && attr != DataType::STRINGARRAY) {
int type_size = FTypeSize(attr);
memcpy(doc_value.data() + offset, field.value.c_str(), type_size);
} else {
Expand Down Expand Up @@ -351,7 +351,7 @@ int Table::Update(const std::unordered_map<std::string, struct Field> &fields,
int field_id = it->second;
int offset = idx_attr_offset_[field_id];

if (field.datatype == DataType::STRING) {
if (field.datatype == DataType::STRING || field.datatype == DataType::STRINGARRAY) {
int len = field.value.size();
storage_mgr_->UpdateString(docid, name, field.value.c_str(), len);
} else {
Expand Down Expand Up @@ -489,7 +489,7 @@ int Table::GetFieldRawValue(int docid, int field_id, std::string &value,
DataType data_type = attrs_[field_id];
size_t offset = idx_attr_offset_[field_id];

if (data_type == DataType::STRING) {
if (data_type == DataType::STRING || data_type == DataType::STRINGARRAY) {
storage_mgr_->GetString(docid, field_name, value);
} else {
int value_len = FTypeSize(data_type);
Expand Down Expand Up @@ -520,7 +520,7 @@ int Table::GetFieldRawValue(int docid, int field_id,
DataType data_type = attrs_[field_id];
size_t offset = idx_attr_offset_[field_id];

if (data_type == DataType::STRING) {
if (data_type == DataType::STRING || data_type == DataType::STRINGARRAY) {
const auto iter = idx_attr_map_.find(field_id);
if (iter == idx_attr_map_.end()) {
LOG(ERROR) << name_ << " cannot find field [" << field_id << "]";
Expand Down

0 comments on commit fb001df

Please sign in to comment.