## Hands-on 6
#### In this hands-on, an LLM will be installed and served.
#### Retrieval Augmented Generation (RAG) will be used to to produce more accurate and contextually relevant outputs

In [None]:
!curl https://ollama.ai/install.sh | sh

In [None]:
!ollama --version

In [None]:
pip install colab-xterm ollama chromadb

In [None]:
%load_ext colabxterm
%xterm

In [None]:
!ollama pull phi3

In [None]:
!ollama pull mxbai-embed-large

In [None]:
import ollama

def chat_with_user():
    while True:
        # Get user input
        user_input = input("You: ")

        # Exit the chat if the user types "exit" or "quit"
        if user_input.lower() in ["exit", "quit"]:
            print("Ending chat. Goodbye!")
            break

        # Send the user input to the model and get the response
        response = ollama.chat(model='phi3', messages=[
            {
                'role': 'user',
                'content': user_input,
            },
        ])

        # Print the model's response
        print(f"Chatbot: {response['message']['content']}")

# Start the chat
chat_with_user()

In [None]:
import chromadb
from ollama import Client

# Initialize the LLM client and ChromaDB client
llm = Client(host="http://localhost:11434")
client = chromadb.Client()

# Define the documents and create a collection
documents = [
    "Ahmed Bin Careem is a professor at Imaginative University.",
    "Ahmed Bin Careem holds a Ph.D. in Artificial Intelligence.",
    "Ahmed Bin Careem is a highly qualified and experienced educator with expertise in cutting-edge technologies such as Machine Learning, Deep Learning, and Large Language Models.",
]

collection = client.create_collection(name="docs")

# Store each document in the vector embedding database
for i, d in enumerate(documents):
    response = llm.embeddings(model="mxbai-embed-large", prompt=d)
    embedding = response["embedding"]
    collection.add(ids=[str(i)], embeddings=[embedding], documents=[d])

# Function to generate response based on user prompt
def generate_response(prompt):
    # Generate an embedding for the prompt and retrieve the most relevant doc
    response = llm.embeddings(prompt=prompt, model="mxbai-embed-large")
    results = collection.query(query_embeddings=[response["embedding"]], n_results=1)
    data = results["documents"][0][0]

    # Generate a response combining the prompt and retrieved data
    output = llm.generate(
        model="phi3", prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
    )
    return output["response"]

# Continuously chat with the user
while True:
    user_prompt = input("You: ")
    if user_prompt.lower() in ["exit", "quit"]:
        print("Ending chat. Goodbye!")
        break
    response = generate_response(user_prompt)
    print(f"Chatbot: {response}")
