In [9]:
import os
import requests
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS # vector store
from langchain_openai import ChatOpenAI,OpenAIEmbeddings # tend to use this whenever gemini runs into quota limit
from langchain_google_vertexai import ChatVertexAI # runs into quota limit sometimes
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import Optional, Type
from langchain.callbacks.manager import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)
from langchain.tools.retriever import create_retriever_tool
from langchain_community.document_loaders import WebBaseLoader # web document loader
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage
from langgraph.checkpoint import MemorySaver  # an in-memory checkpointer
from langgraph.prebuilt import create_react_agent
load_dotenv()

True

In [4]:
# For tracing and debugging
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# credential.json is not required if you are working within your own GCP project's vertex AI workbench
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/workspaces/Transcendent/fleet-anagram-244304-7dafcc771b2f.json"

# if you are using text embedding model from google
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [5]:
# Let's define a custom tool by subclassing the BaseTool class (from langchain)

class GithubUserCredentials(BaseModel):
    username: str = Field(description="Github username")
    github_access_token: str = Field(description="Github access token")
    
class GithubActivityTool(BaseTool):
    name = "Github_Activity_Tool" # it cannot handle spaces
    description = "useful for when you need to track user's github activity / events"
    args_schema: Type[BaseModel] = GithubUserCredentials

    def _run(self, username, github_access_token):
        url = f"https://api.github.com/users/{username}/events"
        headers = {"Authorization": f"token {github_access_token}"}

        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an exception for HTTP errors

        events_data = response.json()
        return events_data

In [6]:
github_event_search = GithubActivityTool()
print(github_event_search.name)
print(github_event_search.description)
print(github_event_search.args)

Github_Activity_Tool
useful for when you need to track user's github activity / events
{'username': {'title': 'Username', 'description': 'Github username', 'type': 'string'}, 'github_access_token': {'title': 'Github Access Token', 'description': 'Github access token', 'type': 'string'}}


In [7]:
## create an RAG tool to scrape and summarize the trending repositories and developers on github
url_list = [
    "https://github.com/trending",
    "https://github.com/trending/developers"
]

docs = []
for path in url_list:
    loader = WebBaseLoader(web_paths=(path,))
    docs += loader.load()

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [10]:
tool = create_retriever_tool(
    retriever,
    "github_trending_repositories_and_developers_retriever",
    "Searches and returns trending repositories and developers on github that are similar to the given query",
)

In [11]:
llm = ChatOpenAI(model="gpt-4o")
tools = [GithubActivityTool(), tool]

In [12]:
system_message = "You are a helpful assistant."

memory = MemorySaver()
app = create_react_agent(
    llm, tools, messages_modifier=system_message, checkpointer=memory
)

In [13]:
config = {"configurable": {"thread_id": "test-thread"}}
print(
    app.invoke(
        {
            "messages": [
                ("user", f"whanyu1212, {GITHUB_ACCESS_TOKEN}. Return the summarized text as well as the frequency table of different types of events."),
            ]
        },
        config,
    )["messages"][-1].content)

### Summarized Text

**Recent GitHub Activities of whanyu1212:**

1. **PushEvent**: 
   - **Repository**: whanyu1212/Transcendent
   - **Branches**: hy-dev, Develop
   - **Commits**:
     - Added minor edits to the repo summary and a bullet point to the to-do list.
     - Added an example on retrieving GitHub events.
     - Added JSON into `.gitignore`.
     - Example on GitHub events summary.
     - Created an agent for GitHub event tracking.
     - Demo UI without refactoring.
     - Added an update log in README.
     - Merged a pull request testing a coding agent with custom tools.

2. **WatchEvent**:
   - **Repositories**: Eng-Elias/CrewAI-Visualizer, OthersideAI/self-operating-computer, chiphuyen/machine-learning-systems-design, chiphuyen/python-is-cool, chiphuyen/ml-interviews-book, ibis-project/ibis.

3. **PullRequestEvent**:
   - **Repository**: whanyu1212/Transcendent
   - **Pull Requests**:
     - Opened and closed a pull request titled "tested coding agent with custom tools

In [15]:
print(
    app.invoke(
        {
            "messages": [
                ("user", "What are the trending repositories that are related to machine learning, AI, and data science?"),
            ]
        },
        config,
    )["messages"][-1].content)

Here are some of the trending repositories related to machine learning, AI, and data science:

1. **[apple/axlearn](https://github.com/apple/axlearn)**
   - **Description**: An Extensible Deep Learning Library.
   - **Language**: Python
   - **Stars**: 1,292
   - **Forks**: 163
   - **Stars Today**: 108

2. **[karpathy/nanoGPT](https://github.com/karpathy/nanoGPT)**
   - **Description**: The simplest, fastest repository for training/finetuning medium-sized GPTs.
   - **Language**: Python
   - **Stars**: 33,696
   - **Forks**: 5,129
   - **Stars Today**: 327

3. **[xenova/transformers.js](https://github.com/xenova/transformers.js)**
   - **Description**: State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!
   - **Language**: JavaScript
   - **Stars**: 9,117
   - **Forks**: 525
   - **Stars Today**: 201

These repositories are currently trending and offer various tools and libraries for deep learning, GPT training, and run