In [1]:
! pip install faiss-cpu==1.7.4 mistralai

Defaulting to user installation because normal site-packages is not writeable


In [2]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import requests
import numpy as np
import faiss
import os
from getpass import getpass

api_key= getpass("Type your API Key")
client = MistralClient(api_key=api_key)



## Get Data

In [4]:
# Reading the text file with error handling for different encodings
file_path = 'Human_Nutrition.txt'

try:
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
except UnicodeDecodeError:
    with open(file_path, 'r', encoding='ISO-8859-1') as file:
        text = file.read()

# Print the length of the text
print(len(text))


6753


## Split document into chunks

In [5]:
chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
len(chunks)

4

In [None]:
def get_text_embedding(input):
    # Request embeddings for the input text from the "mistral-embed" model
    embeddings_batch_response = client.embeddings(
        model="mistral-embed",
        input=input
    )

    # Return the embedding of the first item in the response
    # embeddings_batch_response.data is a list of responses, each with an 'embedding' attribute
    return embeddings_batch_response.data[0].embedding


In [None]:
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

In [None]:
text_embeddings.shape

(4, 1024)

In [None]:
text_embeddings

array([[-0.00406265,  0.01577759,  0.03512573, ...,  0.00774002,
         0.04968262, -0.02374268],
       [-0.01263428,  0.01436615,  0.0242157 , ...,  0.01223755,
         0.04141235, -0.01473999],
       [-0.02081299,  0.01425171,  0.03396606, ..., -0.00672913,
         0.04904175, -0.02232361],
       [-0.01119232,  0.0196228 ,  0.02223206, ...,  0.01515198,
         0.04684448, -0.02009583]])

### Load into a vector database


In [None]:
d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

### Create embeddings for a question

In [None]:
question = "What are Lipids? "
question_embeddings = np.array([get_text_embedding(question)])
question_embeddings.shape


(1, 1024)

In [None]:
question_embeddings

array([[ 0.00132465,  0.01548767,  0.05230713, ..., -0.00120831,
         0.03089905, -0.03286743]])

### Retrieve similar chunks from the vector database

In [None]:
D, I = index.search(question_embeddings, k=2)
print(I)


[[2 1]]


In [None]:
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
print(retrieved_chunk)

['s). In addition to energy storage, lipids serve as a major component of\ncell membranes, surround and protect organs (in fat-storing tissues), provide insulation\nto aid in temperature regulation, and regulate many other functions in the body.\n\nProteins\nProteins are macromolecules composed of chains of subunits called amino acids.\nAmino acids are simple subunits composed of carbon, oxygen, hydrogen, and nitrogen.\nFood sources of proteins include meats, dairy products, seafood, and a variety of\ndifferent plant- based foods, most notably soy. The word protein comes from a Greek\nword meaning “of primary importance,” which is an apt description of these\nmacronutrients; they are also known colloquially as the “workhorses” of life. Proteins\nprovide four kilocalories of energy per gram; however providing energy is not protein’s\nmost important function. Proteins provide structure to bones, muscles and skin, and\n\n\x0cplay a role in conducting most of the chemical reactions that ta


  
### Combine context and question in a prompt and generate response


In [None]:
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

In [None]:
def run_mistral(user_message, model="mistral-medium-latest"):
    messages = [
        ChatMessage(role="user", content=user_message)
    ]
    chat_response = client.chat(
        model=model,
        messages=messages
    )
    return (chat_response.choices[0].message.content)

In [None]:
run_mistral(prompt)

'Lipids are a family of molecules composed of carbon, hydrogen, and oxygen, but unlike carbohydrates, they are insoluble in water. They are found predominantly in butter, oils, meats, dairy products, nuts, seeds, and many processed foods. The three main types of lipids are triglycerides (triacylglycerols), phospholipids, and sterols. The main job of lipids is to provide or store energy. Lipids provide more energy per gram than carbohydrates (nine kilocalories per gram of lipids versus four kilocalories per gram of carbohydrate). In addition to energy storage, lipids serve as a major component of cell membranes, surround and protect organs (in fat-storing tissues), provide insulation to aid in temperature regulation, and regulate many other functions in the body.'