In [33]:
import os
import textwrap
import re

from llama_index.llms import HuggingFaceInferenceAPI
from llama_index import ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index import VectorStoreIndex
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index import download_loader
from llama_hub.github_repo import GithubRepositoryReader, GithubClient

from dotenv import load_dotenv

load_dotenv()

llm = HuggingFaceInferenceAPI(
    model_name="mistralai/Mistral-7B-Instruct-v0.2",  # replace with your model name
    context_window=2048,  # to use refine
    token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),  # replace with your HuggingFace token
)

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5", chunk_size=1024, chunk_overlap=64)

# from llama_index.embeddings import HuggingFaceInferenceAPIEmbedding

# embed_model1 = HuggingFaceInferenceAPIEmbedding(
#                 model_name="sentence-transformers/all-MiniLM-L6-v2")

# service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model1)

node_parser = service_context.node_parser

In [29]:
load_dotenv()

True

In [None]:
# llm = HuggingFaceHub(
#     repo_id="mistralai/Mistral-7B-Instruct-v0.2", 
#     model_kwargs={"temperature": 0.5, "max_length": 64,"max_new_tokens":512}
# )

# query = "Who is elon musk? from a scientist perspective"

# prompt = f"""
#  <|system|>
# You are an AI assistant that follows instruction extremely well.
# Please be truthful and give direct answers
# </s>
#  <|user|>
#  {query}
#  </s>
#  <|assistant|>
# """

# response = llm.predict(prompt)
# print(response)

In [2]:
#Fetch and set API keys
# active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
# dataset_path = os.getenv("DATASET_PATH")
# github_token = os.getenv('GITHUB_TOKEN')

In [4]:
# active_loop_token, dataset_path, github_token

In [5]:
def parse_github_url(url):
    pattern = r"https:\/\/github\.com\/([^/]+)\/([^/]+)"
    match = re.match(pattern, url)
    return match.groups() if match else (None, None)

In [6]:
# active_loop_token

In [7]:
parse_github_url("https://github.com/zmusaddique/chatbot-restaurant")

('zmusaddique', 'chatbot-restaurant')

In [8]:
def validate_owner_repo(owner, repo):
    return bool(owner) and bool(repo)

In [9]:
def initialize_github_client():
    github_token = os.getenv("GITHUB_TOKEN")
    return GithubClient(github_token)

In [10]:
def generate_queries(llm, query_str: str, num_queries: int = 4):
    fmt_prompt = query_gen_prompt.format(
        num_queries=num_queries - 1, query=query_str
    )
    response = llm.complete(fmt_prompt)
    queries = response.text.split("\n")
    return queries

In [11]:
async def run_queries(queries, retrievers):
    """Run queries against retrievers."""
    tasks = []
    for query in queries:
        for i, retriever in enumerate(retrievers):
            tasks.append(retriever.aretrieve(query))

    task_results = await tqdm.gather(*tasks)

    results_dict = {}
    for i, (query, query_result) in enumerate(zip(queries, task_results)):
        results_dict[(query, i)] = query_result

    return results_dict

In [31]:
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index import PromptTemplate
# get retrievers
from llama_index.retrievers import BM25Retriever
from tqdm.asyncio import tqdm
from llama_index.response.notebook_utils import display_source_node
from llama_index import QueryBundle
from llama_index.retrievers import BaseRetriever
from typing import Any, List
from llama_index.schema import NodeWithScore
from llama_index.query_engine import RetrieverQueryEngine
import asyncio
from llama_index.postprocessor import SentenceTransformerRerank
from llama_index.response_synthesizers import get_response_synthesizer
import chromadb

In [13]:
def fuse_results(results_dict, similarity_top_k: int = 2):
    """Fuse results."""
    k = 60.0  # `k` is a parameter used to control the impact of outlier rankings.
    fused_scores = {}
    text_to_node = {}

    # compute reciprocal rank scores
    for nodes_with_scores in results_dict.values():
        for rank, node_with_score in enumerate(
            sorted(
                nodes_with_scores, key=lambda x: x.score or 0.0, reverse=True
            )
        ):
            text = node_with_score.node.get_content()
            text_to_node[text] = node_with_score
            if text not in fused_scores:
                fused_scores[text] = 0.0
            fused_scores[text] += 1.0 / (rank + k)

    # sort results
    reranked_results = dict(
        sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    )

    # adjust node scores
    reranked_nodes: List[NodeWithScore] = []
    for text, score in reranked_results.items():
        reranked_nodes.append(text_to_node[text])
        reranked_nodes[-1].score = score

    return reranked_nodes[:similarity_top_k]

In [14]:
class FusionRetriever(BaseRetriever):
    """Ensemble retriever with fusion."""

    def __init__(
        self,
        llm,
        retrievers: List[BaseRetriever],
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._retrievers = retrievers
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        queries = generate_queries(llm, query_str, num_queries=4)
        results = run_queries(queries, [vector_retriever, bm25_retriever])
        final_results = fuse_results(
            results_dict, similarity_top_k=self._similarity_top_k
        )

        return final_results

In [15]:
query_gen_prompt_str = (
        "You are a helpful assistant that generates multiple search queries based on a "
        "single input query. Generate {num_queries} search queries, one on each line, "
        "related to the following input query:\n"
        "Query: {query}\n"
        "Queries:\n"
    )
query_gen_prompt = PromptTemplate(query_gen_prompt_str)

In [16]:
# import os

# from llama_index import download_loader
# download_loader("GithubRepositoryReader")

# from llama_hub.github_repo import GithubRepositoryReader, GithubClient

# github_client = GithubClient('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')
# loader = GithubRepositoryReader(
#     github_client,
#     owner =                  "jerryjliu",
#     repo =                   "llama_index",
#     filter_directories =     (["llama_index", "docs"], GithubRepositoryReader.FilterType.INCLUDE),
#     filter_file_extensions = ([".py"], GithubRepositoryReader.FilterType.INCLUDE),
#     verbose =                True,
#     concurrent_requests =    10,
# )

# # docs = loader.load_data(branch="main")
# # alternatively, load from a specific commit:
# docs = loader.load_data(commit_sha="c4cbb7d361bbacbbddd93742e57fd9aaba20b65e")

# for doc in docs:
#     print(doc.extra_info)

In [34]:
async def main():
    huggingfacehub_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
    if not huggingfacehub_api_token:
        raise EnvironmentError("HuggingFaceHub API key not found in enivronment variables")
        
    github_token = os.getenv('GITHUB_TOKEN')
    if not github_token:
        raise EnvironmentError("Github token not found in environment variables")
        
    active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
    if not active_loop_token:
        raise EnvironmentError("Activloop token not found in environment variables")
        
    github_client = initialize_github_client()
    download_loader("GithubRepositoryReader")
    print("loader loaded")
    github_url = "https://github.com/zmusaddique/chatbot-restaurant"
    
    while True:
        owner, repo = parse_github_url(github_url)
        if validate_owner_repo(owner, repo):
            loader = GithubRepositoryReader(
                github_client,
                owner=owner,
                repo=repo,
                filter_file_extensions=([".py", ".js", ".ts", ".md", ".ipynb"],
                                       GithubRepositoryReader.FilterType.INCLUDE,),
                verbose=False,
                concurrent_requests=20,
            )
            print(f"Loading {repo} repository by {owner}")
            docs = loader.load_data(branch="main")
            
            print("Documeznts uploaded: ")
            for doc in docs:
                print(doc.metadata)
#             nodes = node_parser.get_nodes_from_documents(docs)
            nodes = service_context.node_parser.get_nodes_from_documents(docs)
            break # Exit the loop once the valid URL is processed
        else:
            print("Invalid Github URL. Please try again.")
            github_url = "https://github.com/zmusaddique/chatbot-restaurant"
            #github_url = input("Please enter the GitHub repository URL: ")
            

            
    print("Uploading to vector store...")
    
    
    
    # -------------Create vector store and upload data---------------
    
    chroma_client = chromadb.EphemeralClient()
    chroma_collection = chroma_client.get_or_create_collection("codechat")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    
    service_context = ServiceContext.from_defaults(llm = llm, embed_model=embed_model)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
#     vector_index = VectorStoreIndex.from_documents(
#         docs, 
#         storage_context=storage_context, 
#         service_context=service_context, 
#         show_progress=True,
#     )
    
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    storage_context.docstore.add_documents(nodes)
#     index = VectorStoreIndex.from_documents(docs, storage_context=storage_context,llm=llm, )
#     index = VectorStoreIndex.from_documents(documents=docs, service_context=service_context)
#     vector_index = VectorStoreIndex.from_documents(nodes, storage_context=storage_context, service_context=service_context, show_progress=True,)
    vector_index = VectorStoreIndex(
        docs, 
        storage_context=storage_context, 
        service_context=service_context, 
        show_progress=True,
    )
    print("vector_index created")
#     query_engine = vector_index.as_query_engine(streaming=True, similarity_top_k=10)
#     query_engine = vector_index.as_query_engine(similarity_top_k=10)
    
#     query_engine_tools = [
#         QueryEngineTool(
#             query_engine=query_engine,
#             metadata=ToolMetadata(
#                 name="GitHub Repo",
#                 description="The Github repo of a project",
#             ),
#         ),
#     ]
    
#     query_engine = SubQuestionQueryEngine.from_defaults(
#         query_engine_tools=query_engine_tools,
#         service_context=service_context,
#         use_async=True,
#     )
    #++++++++++++++++++++++++++++++++++++++++++++
    # Fusion
    print("Fusion starting")
    query_str = "How do the models developed in this work compare to open-source chat models based on the benchmarks tested?"
#     query_gen_prompt_str = (
#         "You are a helpful assistant that generates multiple search queries based on a "
#         "single input query. Generate {num_queries} search queries, one on each line, "
#         "related to the following input query:\n"
#         "Query: {query}\n"
#         "Queries:\n"
#     )
#     query_gen_prompt = PromptTemplate(query_gen_prompt_str)
    

    ## vector retriever
    vector_retriever = vector_index.as_retriever(similarity_top_k=2)

    ## bm25 retriever
    bm25_retriever = BM25Retriever.from_defaults(
        nodes=nodes,
        similarity_top_k=2
    )
    
    # will retrieve context from specific companies
    #nodes = bm25_retriever.retrieve("What happened at Viaweb and Interleaf?")
#     for node in nodes:
#         display_source_node(node)
    query_str = "What is the repository about and what is the tech stack?"
    queries = generate_queries(llm, query_str, num_queries=4)
    results_dict = await run_queries(queries, [vector_retriever, bm25_retriever])
    print(results_dict)
    final_results = fuse_results(results_dict)
    
    for n in final_results:
        display_source_node(n, source_length=500)
        
    fusion_retriever = FusionRetriever(
        llm, [vector_retriever, bm25_retriever], similarity_top_k=2
    )
    
    response_synthesizer = get_response_synthesizer(service_context=service_context)    
    query_engine = RetrieverQueryEngine(
        fusion_retriever,
        response_synthesizer=response_synthesizer,
    )
    
    response = query_engine.query(query_str)
    print(str(response))
    
    #++++++++++++++++++++++++++++++++++++++++++++
    
#     Include a simple question to test
    intro_question = "What is the repository about?"
    print(f"Test question: {intro_question}")
    print('=' * 50)
    streaming_response = query_engine.query(intro_question)
    #streaming_response.print_response_stream()
    
    print(f"Answer: {textwrap.fill(str(streaming_response),100)} \n")
    while True:
        user_question = input("Please enter your question (or type 'exit' to quit): ")
        if user_question.lower() == 'exit':
            print("Exiting, Thanks for chatting!")
            break
        print("=" * 50)
        print(f"Your question: {user_question}")
        
        streaming_response = query_engine.query(user_question)
        #streaming_response.print_response_stream()
        print(f"Answer: {textwrap.fill(str(streaming_response), 100)} \n")

if __name__ == "__main__":
    asyncio.run(main())
#     main()

loader loaded
Loading chatbot-restaurant repository by zmusaddique
Documeznts uploaded: 
{'file_path': 'FoodChatBot/db_helper.py', 'file_name': 'db_helper.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/db_helper.py'}
{'file_path': 'FoodChatBot/frontend/frontend_server.py', 'file_name': 'frontend_server.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/frontend/frontend_server.py'}
{'file_path': 'FoodChatBot/generic_helper.py', 'file_name': 'generic_helper.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/generic_helper.py'}
{'file_path': 'FoodChatBot/main.py', 'file_name': 'main.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/main.py'}
{'file_path': 'README.md', 'file_name': 'README.md', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/README.md'}
{'file_path': 'backend/db_helper.py', 'file_name': 'db_helper.py', 'url': 'htt

UnboundLocalError: local variable 'service_context' referenced before assignment

In [None]:
from llama_index.response_synthesizers import get_response_synthesizer

response_synthesizer = get_response_synthesizer(service_context=service_context)

# Fusion Retriever

In [None]:
github_client = initialize_github_client()
download_loader("GithubRepositoryReader")

github_url = "https://github.com/zmusaddique/chatbot-restaurant"
owner, repo = parse_github_url(github_url)

if validate_owner_repo(owner, repo):
    loader = GithubRepositoryReader(
        github_client,
        owner=owner,
        repo=repo,
        filter_file_extensions=([".py", ".js", ".ts", ".md", ".ipynb"],
                               GithubRepositoryReader.FilterType.INCLUDE,),
        verbose=False,
        concurrent_requests=10,
    )
    print(f"Loading {repo} repository by {owner}")
    docs = loader.load_data(branch="main")
    print("Documeznts uploaded: ")
    nodes = node_parser.get_nodes_from_documents(docs)

vector_store = DeepLakeVectorStore(
        dataset_path=dataset_path,
        overwrite=False,
        runtime={"tensor_db":True},
    )
    
storage_context = StorageContext.from_defaults(vector_store=vector_store)
storage_context.docstore.add_documents(nodes)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context, service_context=service_context, show_progress=True,)

## Query Generation/Rewriting

In [None]:
query_str = "How do the models developed in this work compare to open-source chat models based on the benchmarks tested?"
query_gen_prompt_str = (
    "You are a helpful assistant that generates multiple search queries based on a "
    "single input query. Generate {num_queries} search queries, one on each line, "
    "related to the following input query:\n"
    "Query: {query}\n"
    "Queries:\n"
)
query_gen_prompt = PromptTemplate(query_gen_prompt_str)

def generate_queries(llm, query_str: str, num_queries: int = 4):
    fmt_prompt = query_gen_prompt.format(
        num_queries=num_queries - 1, query=query_str
    )
    response = llm.complete(fmt_prompt)
    queries = response.text.split("\n")
    return queries

queries = generate_queries(llm, query_str, num_queries=4)

print(queries)

In [None]:
len(queries)

## Perform Vector Search for Each Query

In [None]:
from tqdm.asyncio import tqdm

async def run_queries(queries, retrievers):
    """Run queries against retrievers."""
    tasks = []
    for query in queries:
        for i, retriever in enumerate(retrievers):
            tasks.append(retriever.aretrieve(query))

    task_results = await tqdm.gather(*tasks)

    results_dict = {}
    for i, (query, query_result) in enumerate(zip(queries, task_results)):
        results_dict[(query, i)] = query_result

    return results_dict

In [None]:
# get retrievers
from llama_index.retrievers import BM25Retriever


## vector retriever
vector_retriever = vector_index.as_retriever(similarity_top_k=2)

## bm25 retriever
bm25_retriever = BM25Retriever.from_defaults(
    docstore=vector_index.docstore, similarity_top_k=2
)

In [None]:
results_dict = await run_queries(queries, [vector_retriever, bm25_retriever])

#### Context
reciprocal rank fusion: for each node, add up its reciprocal rank in every list where it’s retrieved.

Then reorder nodes by highest score to least.

In [None]:
def fuse_results(results_dict, similarity_top_k: int = 2):
    """Fuse results."""
    k = 60.0  # `k` is a parameter used to control the impact of outlier rankings.
    fused_scores = {}
    text_to_node = {}

    # compute reciprocal rank scores
    for nodes_with_scores in results_dict.values():
        for rank, node_with_score in enumerate(
            sorted(
                nodes_with_scores, key=lambda x: x.score or 0.0, reverse=True
            )
        ):
            text = node_with_score.node.get_content()
            text_to_node[text] = node_with_score
            if text not in fused_scores:
                fused_scores[text] = 0.0
            fused_scores[text] += 1.0 / (rank + k)

    # sort results
    reranked_results = dict(
        sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    )

    # adjust node scores
    reranked_nodes: List[NodeWithScore] = []
    for text, score in reranked_results.items():
        reranked_nodes.append(text_to_node[text])
        reranked_nodes[-1].score = score

    return reranked_nodes[:similarity_top_k]

In [None]:
final_results = fuse_results(results_dict)

In [None]:
!ping www.google.com