In [28]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain_community.vectorstores import Typesense
import typesense
import os

load_dotenv()

embedding_service = OpenAIEmbeddings(model='text-embedding-ada-002')

node = {
    "host": os.getenv("TYPESENSE_HOST"),  # For Typesense Cloud use xxx.a1.typesense.net
    "port": "443",       # For Typesense Cloud use 443
    "protocol": "https"    # For Typesense Cloud use https
}
typesense_client = typesense.Client(
    {
        "nodes": [node],
        "api_key": os.getenv("TYPESENSE_API_KEY"),
        "connection_timeout_seconds": 5
    }
)
typesense_collection_name = "nea-items-with-vector"

# vectorstore = Typesense(
#     typesense_client=typesense_client,
#     embedding=embedding_service,
#     typesense_collection_name=typesense_collection_name,
#     text_key="item",
# )

In [29]:
COLLECTION_NAME = os.getenv("TYPESENSE_COLLECTION_NAME")

'nea-items-with-vector'

In [9]:
embedded_query = [str(x) for x in embedding_service.embed_query("bank card")]
len(embedded_query)

1536

In [10]:
search_parameters = {
    'collection': COLLECTION_NAME,
    'q'         : '*',
    "vector_query": f'vec:([{",".join(embedded_query)}], k:{3})',
    "exclude_fields": "vec"
}

docs = []
response = typesense_client.multi_search.perform(
    {"searches": [search_parameters]}, {}
)
# for hit in response["results"][0]["hits"]:
#     document = hit["document"]
#     metadata = document["metadata"]
#     text = document["item"]
#     score = hit["vector_distance"]
#     docs.append((Document(page_content=text, metadata=metadata), score))

In [11]:
response['results'][0]['hits']

[{'document': {'id': 'K8dgLoEWXFj5RTolBfsm',
   'instructions': 'Should be disposed of as general waste.',
   'item': 'bank card',
   'material': 'Plastic'},
  'highlight': {},
  'highlights': [],
  'vector_distance': 0.15616685152053833},
 {'document': {'id': 'CNUeuF9SfghzfAMxGC1Z',
   'instructions': 'Should be disposed of as general waste.',
   'item': 'debit card',
   'material': 'Plastic'},
  'highlight': {},
  'highlights': [],
  'vector_distance': 0.16572457551956177},
 {'document': {'id': 'MKT17mhgvCorMVzVL4mF',
   'instructions': 'Should be disposed of as general waste.',
   'item': 'atm card',
   'material': 'Plastic'},
  'highlight': {},
  'highlights': [],
  'vector_distance': 0.17570042610168457}]

## [Semantic search using auto embedding](https://typesense.org/docs/26.0/api/vector-search.html#semantic-search)

In [13]:

search_parameters = {
'q'                          : 'wallet',
'query_by'                   : 'vec',
"prefix": "false"
}

typesense_client.collections[COLLECTION_NAME].documents.search(search_parameters)


{'facet_counts': [],
 'found': 10,
 'hits': [{'document': {'id': 'T8hOYOidpSMr92gkjZhB',
    'instructions': 'Donate if it can be reused.',
    'item': 'diary',
    'material': 'Paper',
    'vec': [-0.015015119686722755,
     -0.008541453629732132,
     -0.004581571090966463,
     -0.023070035502314568,
     -0.0007762654568068683,
     0.01091332919895649,
     -0.025867631658911705,
     -0.02200235240161419,
     -0.01072411984205246,
     -0.02134012058377266,
     0.023516029119491577,
     0.04324786737561226,
     0.012717576697468758,
     0.018893912434577942,
     -0.012994633056223392,
     -0.0052809701301157475,
     0.01938045024871826,
     4.40028052253183e-05,
     -0.0002034634817391634,
     -0.011568805202841759,
     -0.028381412848830223,
     0.0062979706563055515,
     -0.02246186137199402,
     0.004736992996186018,
     -0.011575561948120594,
     0.005720206536352634,
     0.025381091982126236,
     -0.003027350874617696,
     0.02147527039051056,
     -0.000

### [Hybrid search](https://typesense.org/docs/26.0/api/vector-search.html#hybrid-search)

In [39]:
query_obj = {
    "collection"                 : COLLECTION_NAME,
    'q'                          : 'wallet',
    'query_by'                   : 'vec,item,instructions',
    "prefix"                     : "false",
    "vector_query"               : "vec:([], alpha: 0.3)",  # alpha is weight given to semantic (vector) search, (1 - alpha) is weight given to keyword search
    "exclude_fields"             : "vec",
    "limit"                      : 3,
}

common_search_params = {}

response = typesense_client.multi_search.perform({"searches": [query_obj]}, common_search_params)
response


{'results': [{'facet_counts': [],
   'found': 100,
   'hits': [{'document': {'id': 'T8hOYOidpSMr92gkjZhB',
      'instructions': 'Donate if it can be reused.',
      'item': 'diary',
      'material': 'Paper'},
     'highlight': {},
     'highlights': [],
     'hybrid_search_info': {'rank_fusion_score': 0.30000001192092896},
     'text_match': 0,
     'text_match_info': {'best_field_score': '0',
      'best_field_weight': 0,
      'fields_matched': 0,
      'num_tokens_dropped': 1,
      'score': '0',
      'tokens_matched': 0,
      'typo_prefix_score': 255},
     'vector_distance': 0.20012575387954712},
    {'document': {'id': 'lio7Tdlxe43EVWJ1KkW9',
      'instructions': 'Donate if it can be reused.',
      'item': 'jotter book',
      'material': 'Paper'},
     'highlight': {},
     'highlights': [],
     'hybrid_search_info': {'rank_fusion_score': 0.15000000596046448},
     'text_match': 0,
     'text_match_info': {'best_field_score': '0',
      'best_field_weight': 0,
      'fiel

In [40]:
doc = response['results'][0]['hits'][0]
doc

{'document': {'id': 'T8hOYOidpSMr92gkjZhB',
  'instructions': 'Donate if it can be reused.',
  'item': 'diary',
  'material': 'Paper'},
 'highlight': {},
 'highlights': [],
 'hybrid_search_info': {'rank_fusion_score': 0.30000001192092896},
 'text_match': 0,
 'text_match_info': {'best_field_score': '0',
  'best_field_weight': 0,
  'fields_matched': 0,
  'num_tokens_dropped': 1,
  'score': '0',
  'tokens_matched': 0,
  'typo_prefix_score': 255},
 'vector_distance': 0.20012575387954712}

In [45]:
doc['hybrid_search_info']['rank_fusion_score']

0.30000001192092896

In [48]:
from langchain_core.documents import Document
import json

docs = []
for hit in response["results"][0]["hits"]:
    document = hit["document"]
    content = json.dumps({
        "item": document["item"],
        "instructions": document["instructions"],
        "material": document["material"]
    })
    # score = hit['hybrid_search_info']['rank_fusion_score']
    # docs.append((Document(page_content=content), score))
    docs.append(Document(page_content=content))
len(docs)

3

In [49]:
docs[0]

Document(page_content='{"item": "diary", "instructions": "Donate if it can be reused.", "material": "Paper"}')