In [None]:
import os
import requests
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
import chromadb
import gradio as gr

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_actual_token_here"
if not os.environ["HUGGINGFACEHUB_API_TOKEN"]:
    raise ValueError("HUGGINGFACEHUB_API_TOKEN is not set in the environment variables.")

def initialize_embeddings():
    model_identifier = "sentence-transformers/all-mpnet-base-v2"
    return HuggingFaceEmbeddings(model_name=model_identifier)

def process_and_embed_docs(dir_path, hf_model):
    chroma_instance = chromadb.Client()
    doc_loader = DirectoryLoader(dir_path)
    loaded_docs = doc_loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    split_docs = splitter.split_documents(loaded_docs)
    database = Chroma.from_documents(documents=split_docs, embedding=hf_model)
    return database

def concatenate_documents(document_list):
    combined_content = "".join([doc.page_content for doc in document_list])
    return combined_content

hf_model = initialize_embeddings()
example_path = r"PATH_TO_YOUR_DATASET_DIRECTORY"
vector_database = process_and_embed_docs(example_path, hf_model)

def query_llama2_EP(context, query, endpoint_url):
    template = f"""
    <s>[INST] <<SYS>>
    Confine your answer within the given context and do not generate the next context.
    Answer truthful answers, don't try to make up an answer.
    <</SYS>>
    Context: {context}
    Question: {query}
    Answer: """
    
    headers = {
        'Authorization': 'YOUR_AUTHORIZATION_KEY_HERE',
        'Content-Type': 'application/json',
    }
    
    config = {
        "max_new_tokens": 200,
        "temperature": 0.01,
        "return_full_text": False,
        "early_stopping": False,
        "stop_sequence": "***",
        "do_sample": True,
        "top_p": 0.9,
        "num_return_sequences": 1
    }
    
    json_data = {
        'inputs': template,
        'parameters': config
    }
    
    response = requests.post(endpoint_url, headers=headers, json=json_data)
    result = response.text.split("\":\"")[1].split("\"}]")[0]
    return result

def process_query(query):
    retrieved_docs = vector_database.search_similar(query)
    combined_context = concatenate_documents(retrieved_docs)
    answer = query_llama2_EP(combined_context, query, 'YOUR_ENDPOINT_URL_HERE')
    return answer.replace("\\n", "\n")

alula_smart_speaker = gr.Interface(
    fn=process_query,
    inputs="textbox",
    outputs="textbox",
    title="Smart Speaker"
)

alula_smart_speaker.launch(share=True)
