In [1]:
import numpy as np
import llama_index
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import google.generativeai as palm
from llama_index.llms.palm import PaLM
import math
from flask import app
import json
from openai import OpenAI
import chromadb
from chromadb.config import Settings
import os

  from .autonotebook import tqdm as notebook_tqdm


In [31]:
chroma_client = chromadb.PersistentClient(settings=Settings(allow_reset=True))
chroma_client.heartbeat()


True

In [32]:
from chromadb.utils import embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                model_name="text-embedding-ada-002", 
                api_key = "YOUR API KEY")

collection = chroma_client.get_or_create_collection(name="deptName", embedding_function=openai_ef)

In [4]:
from elasticsearch import Elasticsearch
import faiss

In [5]:
client = OpenAI(api_key="YOUR API KEY")

In [6]:


def read_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def chunk_text(text, chunk_size):
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    return chunks



In [21]:
text_file = read_text_file("harry-potter-deathly-hallows.txt")
chunks = chunk_text(text_file, chunk_size=250)

In [None]:

chunk_ids = list(map(lambda tup: f"id{tup[0]}", enumerate(chunks)))
collection.add(documents=chunks,ids= chunk_ids)

In [14]:
documents = []
for i, chunk in enumerate(chunks):
    document = {
        "id": i + 1,  # Adding 1 to start IDs from 1 instead of 0
        "text": chunk
    }
    documents.append(document)

In [16]:
ES_NODES = "http://localhost:9200" #elastic search

es = Elasticsearch(
    hosts=ES_NODES,
)
for doc in documents:
    es.index(index="documents", id=doc['id'], document={"text": doc['text']})

In [86]:
def hybrid_search(query_text,alpha=0.3):
    response = es.search(index="documents", query={"match": {"text": query_text}}, size=5)
    keyword_results = {hit['_id']: hit['_score'] for hit in response['hits']['hits']}
    # Print keyword search results.
    print("Keyword Search Results:")
    for doc_id, score in keyword_results.items():
        print(f"Doc ID: {doc_id}, Score: {score} ")
   
    
    results = collection.query(
    query_texts=query,
    n_results=5
    )
    
    vector_search = results['documents'][0]

    # Create a dictionary with scores inversely proportional to their rank
    vector_results = {f"{rank+1}": 1/(rank+1) for rank, doc in enumerate(vector_search)}
   
    print("Vector Search Results:")
    for doc_id, score in vector_results.items():
        print(f"Doc ID: {doc_id}, Score: {score}")

    # Initialize a dictionary to hold combined scores from keyword and vector search results.
    combined_scores = {}
    # Iterate over the union of document IDs from both keyword and vector results.
    for doc_id in set(keyword_results.keys()).union(vector_results.keys()):
        # Calculate combined score for each document using the alpha parameter to balance the influence of both search results.
        combined_scores[doc_id] = alpha * keyword_results.get(doc_id, 0) + (1 - alpha) * vector_results.get(doc_id, 0)*10

    # Return the dictionary containing combined scores for all relevant documents.
    return combined_scores

In [95]:


def runPrompt(query, k=5):
    # Step 1: Perform a hybrid search to get combined document scores
    
    combined_scores = hybrid_search(query_text=query)

    # Step 2: Select the top-k documents based on the combined scores
    top_k_docs = sorted(combined_scores, key=combined_scores.get, reverse=True)[:k]
    
    # Step 3: Retrieve the relevant information chunks based on the top-k document IDs
    relevant_info = []
    for doc_id in top_k_docs:
        # Retrieve the document using its ID
        document = next((doc for doc in documents if doc["id"] == int(doc_id)), None)
        if document:
            relevant_info.append(document["text"])
    
    # Step 4: Construct the prompt with the relevant information
    prompt = (
        f"You are a smart agent. A question will be asked to you along with relevant information. "
        f"Your task is to answer the question using the information provided. "
        f"Question: {query}. Relevant Information: {relevant_info}"
    )
    
    # Step 5: Send the constructed prompt to the GPT-4 model (or GPT-4o if applicable)
    response = client.chat.completions.create(
        model="gpt-4o",  # You can change this to "gpt-4" if you have access
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    # Step 6: Extract and return the model's response
    result = {
        "context": relevant_info,
        "response": response.choices[0].message.content
    }
  
    return result


In [14]:
from flask import Flask
from flask import request
app = Flask(__name__)

In [None]:
@app.route('/context', methods=['POST'])
def context_output():
    body = request.get_json()
    print(body)
    output = runContext(body['query'])
    response = {'response': output}
    return response
@app.route('/rag',methods=['POST'])
def rag_output():
    body = request.get_json()
    print(body)
    output = runPrompt(body['query'])
    response = {'response':output}
    return response
    

# main driver function
if __name__ == '__main__':
    app.run(port=5003)