In [2]:
%load_ext jupyternotify

<IPython.core.display.Javascript object>

In [21]:
import os
import textwrap
from dotenv import load_dotenv
from llama_index import download_loader, ServiceContext
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.vector_stores import ChromaVectorStore
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
from llama_index import VectorStoreIndex
# from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.llms import HuggingFaceInferenceAPI
import chromadb
import re

# Load environment variables
load_dotenv()

llm = HuggingFaceInferenceAPI(
    model_name="mistralai/Mistral-7B-Instruct-v0.2",  # replace with your model name
    context_window=2048,  # to use refine
    token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),  # replace with your HuggingFace token
)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Fetch and set API keys
# active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
# dataset_path = os.getenv("DATASET_PATH")


def parse_github_url(url):
    pattern = r"https://github\.com/([^/]+)/([^/]+)"
    match = re.match(pattern, url)
    return match.groups() if match else (None, None)


def validate_owner_repo(owner, repo):
    return bool(owner) and bool(repo)


def initialize_github_client():
    github_token = os.getenv("GITHUB_TOKEN")
    return GithubClient(github_token)


def main():
    # Check for GitHub Token
    github_token = os.getenv("GITHUB_TOKEN")
    if not github_token:
        raise EnvironmentError("GitHub token not found in environment variables")

    # Check for Activeloop Token
    active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
    if not active_loop_token:
        raise EnvironmentError("Activeloop token not found in environment variables")

    github_client = initialize_github_client()
    download_loader("GithubRepositoryReader")

    github_url = input("Please enter the GitHub repository URL: ")
    owner, repo = parse_github_url(github_url)

    while True:
        owner, repo = parse_github_url(github_url)
        if validate_owner_repo(owner, repo):
            loader = GithubRepositoryReader(
                github_client,
                owner=owner,
                repo=repo,
                filter_file_extensions=(
                    [".py", ".js", ".ts", ".md"],
                    GithubRepositoryReader.FilterType.INCLUDE,
                ),
                verbose=False,
                concurrent_requests=5,
            )
            print(f"Loading {repo} repository by {owner}")
            docs = loader.load_data(branch="main")
            print("Documents uploaded:")
            for doc in docs:
                print(doc.metadata)
            break  # Exit the loop once the valid URL is processed
        else:
            print("Invalid GitHub URL. Please try again.")
            github_url = input("Please enter the GitHub repository URL: ")

    print("Uploading to vector store...")

    # ====== Create vector store and upload data ======

#     vector_store = DeepLakeVectorStore(
#         dataset_path=dataset_path,
#         overwrite=True,
#         runtime={"tensor_db": True},
#     )
    #create client and a new collection
    chroma_client = chromadb.EphemeralClient()
    chroma_collection = chroma_client.get_or_create_collection("codechat")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    
    service_context = ServiceContext.from_defaults(llm = llm, embed_model=embed_model)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    index = VectorStoreIndex.from_documents(
        docs, 
        storage_context=storage_context, 
        service_context=service_context
    )
    
    query_engine = index.as_query_engine()

    # Include a simple question to test.
    intro_question = "What is the repository about?"
    print(f"Test question: {intro_question}")
    print("=" * 50)
    answer = query_engine.query(intro_question)

    print(f"Answer: {textwrap.fill(str(answer), 100)} \n")
    while True:
        user_question = input("Please enter your question (or type 'exit' to quit): ")
        if user_question.lower() == "exit":
            print("Exiting, thanks for chatting!")
            break

        print(f"Your question: {user_question}")
        print("=" * 50)

        answer = query_engine.query(user_question)
        print(f"Answer: {textwrap.fill(str(answer), 100)} \n")


if __name__ == "__main__":
    main()

Please enter the GitHub repository URL: https://github.com/zmusaddique/chatbot-restaurant
Loading chatbot-restaurant repository by zmusaddique
Documents uploaded:
{'file_path': 'FoodChatBot/db_helper.py', 'file_name': 'db_helper.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/db_helper.py'}
{'file_path': 'FoodChatBot/frontend/frontend_server.py', 'file_name': 'frontend_server.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/frontend/frontend_server.py'}
{'file_path': 'FoodChatBot/generic_helper.py', 'file_name': 'generic_helper.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/generic_helper.py'}
{'file_path': 'FoodChatBot/main.py', 'file_name': 'main.py', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/FoodChatBot/main.py'}
{'file_path': 'README.md', 'file_name': 'README.md', 'url': 'https://github.com/zmusaddique/chatbot-restaurant/blob/main/README.md'}
{'fi