In [None]:
# Git Ripo Ruunner

In [9]:
import base64
import cohere
import datetime
import git
import json
import matplotlib.pyplot as plt
import numpy as np
import os
from dotenv import load_dotenv
from typing import Annotated, List, Literal, TypedDict

import ipywidgets as widgets
from IPython.display import Image, Markdown, clear_output, display
from ratelimit import limits, sleep_and_retry

from fastembed import TextEmbedding
from langchain.chains import RetrievalQA, create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from langchain.globals import set_debug, set_verbose
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import GitLoader, generic
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.vectorstores import FAISS
from langchain_core.messages import BaseMessage, ToolMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_text_splitters import Language, MarkdownTextSplitter, RecursiveCharacterTextSplitter
from langgraph.graph import StateGraph
from langgraph.graph.message import add_messages
from llama_index.core import Settings, VectorStoreIndex

# Load environment variables
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
langchain_endpoint = os.getenv("LANGCHAIN_ENDPOINT")
os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "classic_bot"

# Set global configurations
set_verbose(False)
set_debug(False)

# Markdown display function
def md(t):
    display(Markdown(str(t)))

llm = ChatGroq(temperature=1, model_name="llama3-70b-8192", api_key=groq_api_key)

timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")


In [8]:


# List of repositories to clone
repo_urls = [
    "https://github.com/microsoft/autogen",
    "https://github.com/vcappuccio/achainflow",
    # Add more repository URLs here
]

# Base directory to store all cloned repositories
base_repo_path = "docs"

# Create the base directory if it doesn't exist
if not os.path.exists(base_repo_path):
    os.makedirs(base_repo_path)

# Function to clone or update a repository
def clone_or_update_repo(repo_url, base_path):
    repo_name = repo_url.split('/')[-1]
    repo_path = os.path.join(base_path, repo_name)
    
    # Clone or update the repository
    if os.path.exists(repo_path):
        repo = git.Repo(repo_path)
        origin = repo.remotes.origin
        origin.pull()
    else:
        repo = git.Repo.clone_from(repo_url, repo_path)
        
    return repo

# Clone or update all repositories and print their latest commits
for repo_url in repo_urls:
    repo = clone_or_update_repo(repo_url, base_repo_path)
    latest_commit = repo.head.commit
    print(f"Latest commit in {repo_url}: {latest_commit.hexsha}")

Latest commit in https://github.com/microsoft/autogen: 84577570ad113ea1add2c3f60f47d4ddb75b98bb
Latest commit in https://github.com/vcappuccio/achainflow: 673ba0eb5c2cfd538eb4a9edcb7a31b7f61c9131


In [13]:

def ingest_docs(question, index_name, repo_path) -> str:
    if not os.path.exists(index_name):
        loader = GitLoader(repo_path=repo_path)
        try:
            raw_documents = loader.load()
        except Exception as e:
            md(f"Error loading documents from the repository: {e}")
            return "Error loading documents from the repository."
        
        if not raw_documents:
            md("No documents found in the repository.")
            return "No documents found in the repository."
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=20,
            separators=["\n\n", "\n", " ", ""]
        )
        documents = text_splitter.split_documents(documents=raw_documents)

        if not documents:
            md("No documents to split.")
            return "No documents to split."
        
        md(f"Split into {len(documents)} chunks")

        embedding_model = OllamaEmbeddings()
        
        vectorstore = FAISS.from_documents(documents, embedding_model)
        vectorstore.save_local(index_name)

    md("Loading index...")
    embedding_model = OllamaEmbeddings()
    my_vectorstore = FAISS.load_local(index_name, embedding_model, allow_dangerous_deserialization=True)

    
    qa = RetrievalQA.from_chain_type(llm=llm, retriever=my_vectorstore.as_retriever(), chain_type="stuff")
    response = qa.invoke({"query": question})
    md("Done")

    return response["result"]

In [14]:
question = """What is this repo about?"""
 
index_name = "faiss_achainflow"
repo_path = "docs/achainflow"

response = ingest_docs(question = question, index_name = index_name, repo_path = repo_path)
md(response)

Loading index...

Done

This repo is about a Python script called `achainflow.py` that provides a Streamlit application to consult a chain of advisors using different AI models (Groq and Ollama) to solve coding problems and technical questions. It takes a user's problem statement as input, consults multiple AI models asynchronously, and generates a final answer based on the responses from the models.

In [15]:
# Using FastEmbed Embeddings


def ingest_docs(question, index_name, repo_path) -> str:
    if not os.path.exists(index_name):
        loader = GitLoader(repo_path=repo_path)
        try:
            raw_documents = loader.load()
        except Exception as e:
            md(f"Error loading documents from the repository: {e}")
            return "Error loading documents from the repository."
        
        if not raw_documents:
            md("No documents found in the repository.")
            return "No documents found in the repository."
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=20,
            separators=["\n\n", "\n", " ", ""]
        )
        documents = text_splitter.split_documents(documents=raw_documents)

        if not documents:
            md("No documents to split.")
            return "No documents to split."
        
        md(f"Split into {len(documents)} chunks")

        embedding_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en")
        
        vectorstore = FAISS.from_documents(documents, embedding_model)
        vectorstore.save_local(index_name)

    md("Loading index...")
    embedding_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en")
    my_vectorstore = FAISS.load_local(index_name, embedding_model, allow_dangerous_deserialization=True)

    
    qa = RetrievalQA.from_chain_type(llm=llm, retriever=my_vectorstore.as_retriever(), chain_type="stuff")
    response = qa.invoke({"query": question})
    md("Done")

    return response["result"]


In [11]:
 
question = """
Write python code for 3 agents, 2 assistant agents and another being a user agent, the assistant agents
will be a writer, and critic.  The user agent will just be simple. We also need to create a group chat and initiate
it.  Create a config_list based on examples from other Autogen code examples. Have all the correct imports,
the correct code for each agent, and initiate the group chat asking a message about the top 5 longest rivers in the
world. Look at example code from AutoGen in order to understand how to do this if needed.  I don't want a simplified
version, give me the full version.  Only return code, nothing else.  The agents should be AutoGen agents, not openai.
Make sure to use the UserAgent and AssistantAgent, GroupChat and GroupChatManager agents to create the group chat and
initiate the chat with.
"""

index_name = "faiss_autogen"
repo_path = "docs/autogen"

response = ingest_docs(question=question, index_name=index_name, repo_path=repo_path)
md(response)


Loading index...

Done

```python
import os
from autogen.agent import AssistantAgent, UserAgent, GroupChat, GroupChatManager

# Define LLM config
config_list = [{"model": "gpt-3.5-turbo", "api_key": os.getenv("OPENAI_API_KEY")}]

# Create agents
writer = AssistantAgent("writer", llm_config={"config_list": config_list})
critic = AssistantAgent("critic", llm_config={"config_list": config_list})
user = UserAgent("user")

# Create group chat
group_chat = GroupChat("group_chat")
group_chat.add_agent(writer)
group_chat.add_agent(critic)
group_chat.add_agent(user)

# Create group chat manager
group_chat_manager = GroupChatManager()

# Initiate group chat
group_chat_manager.initiate_group_chat(group_chat, message="What are the top 5 longest rivers in the world?")
```