## 图像embedding

In [12]:
from towhee import AutoPipes ,pipe

# get the built-in text_image_embedding pipeline
image_embedding = AutoPipes.pipeline('text_image_embedding')

# generate image embedding
embedding = image_embedding('./xx1.jpg').get()

# batch generate image embeddings
embeddings = image_embedding.batch(['./xx1.jpg', './xx2.jpg'])
embeddings = [e.get() for e in embeddings]


## 文字embedding

In [6]:
from towhee import AutoPipes, AutoConfig

# set TextImageEmbeddingConfig for the pipeline
text_conf = AutoConfig.load_config('text_image_embedding')
text_conf.modality = 'text'
text_conf.model='clip_vit_base_patch16'
text_pipe = AutoPipes.pipeline('text_image_embedding', text_conf)

# generate image embedding
embedding = text_pipe('A running dog.').get()

# batch generate image embeddings
embeddings = text_pipe.batch(['A running dog.', 'Puppy Corgi.'])
embeddings = [e.get() for e in embeddings]
#dir(text_conf)

## 建立milvus的collection

In [7]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

def create_milvus_collection(collection_name, dim):
    connections.connect(host='127.0.0.1', port='19530')
    
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
    FieldSchema(name='id', dtype=DataType.INT64, descrition='ids', is_primary=True, auto_id=True), #id自动比较方便
    FieldSchema(name='url', dtype=DataType.VARCHAR, descrition='urls',max_length=512),
    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)

    ]
    schema = CollectionSchema(fields=fields, description='text image search')
    collection = Collection(name=collection_name, schema=schema)

    # create IVF_FLAT index for collection.
    index_params = {
        'metric_type':'L2',
        'index_type':"IVF_FLAT",
        'params':{"nlist":512}
    }
    collection.create_index(field_name="embedding", index_params=index_params)
    return collection

collection = create_milvus_collection('text_image_search_lx', 512)

## 往数据库加入图片项，包括embedding和url等

In [8]:
from towhee import AutoPipes, AutoConfig

# set MilvusInsertConfig for the built-in insert_milvus pipeline
insert_conf = AutoConfig.load_config('insert_milvus')
insert_conf.collection_name = 'text_image_search_lx'

insert_pipe = AutoPipes.pipeline('insert_milvus', insert_conf)

# generate embedding
embedding = image_embedding('./xx1.jpg').get()[0]

# insert text and embedding into Milvus
insert_pipe(['./xx1.jpg' ,embedding]) # 注意顺序不能错

<towhee.runtime.data_queue.DataQueue at 0x244ab34a4b0>

In [9]:
embedding = image_embedding('./xx2.jpg').get()[0]

# insert text and embedding into Milvus
insert_pipe(['./xx2.jpg' ,embedding]) # 注意顺序不能错

<towhee.runtime.data_queue.DataQueue at 0x244ab3496d0>

## 建立搜索pipeline

In [17]:
from towhee import AutoPipes, AutoConfig

# set MilvusSearchConfig for the built-in search_milvus pipeline
search_conf = AutoConfig.load_config('search_milvus')

search_conf.collection_name = 'text_image_search_lx'
search_conf.search_params = {'output_fields': ['url']}

search_pipe = AutoPipes.pipeline('search_milvus', search_conf)

# generate embedding
embedding = text_pipe('stair').get()[0]

# search embedding and get results in Milvus
search_pipe(embedding).get_dict()

{'rows': [[457091581108815342, 1.4777711629867554, './xx1.jpg'],
  [457091581108815344, 1.6302437782287598, './xx2.jpg']]}