In [24]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [25]:
import os

# Set environment variables
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'cortex'

# Get keys from the environment
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

if langchain_api_key:
    os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
else:
    raise ValueError("LANGCHAIN_API_KEY is not set in the environment.")

if groq_api_key:
    os.environ['GROQ_API_KEY'] = groq_api_key
else:
    raise ValueError("GROQ_API_KEY is not set in the environment.")

PART 10 - LOGICAL AND SEMANTIC ROUTING

In [26]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq

# Data model for routing queries
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question, choose which datasource would be most relevant for answering their question",
    )

# Initialize the LLM with function call
llm = ChatGroq(temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

# Define the system prompt
system = """You are an expert at routing a user question to the appropriate data source.
Based on the programming language the question is referring to, route it to the relevant data source."""

# Create the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define the router
router = prompt | structured_llm

# Example usage
def route_query(question: str) -> RouteQuery:
    try:
        # Use the router with the correct input format
        result = router.invoke({"question": question})
        return result
    except Exception as e:
        print(f"Error routing query: {e}")
        return None

# Example query
result = route_query("How do I create a list in Python?")
print(result)

datasource='python_docs'


In [27]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})

In [28]:
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        ### Logic here 
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        ### Logic here 
        return "chain for js_docs"
    else:
        ### Logic here 
        return "golang_docs"

from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [29]:
full_chain.invoke({"question": question})

'chain for python_docs'

Semantic Routing

In [30]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Two prompts
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

# Embed prompts
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
prompt_templates = [physics_template, math_template]
prompt_embeddings = hf_embeddings.embed_documents(prompt_templates)

# Route question to prompt 
def prompt_router(input):
    # Embed question
    query_embedding = hf_embeddings.embed_query(input["query"])
    # Compute similarity
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    # Chosen prompt 
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)


chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatGroq()
    | StrOutputParser()
)

print(chain.invoke("What's a black hole"))

Using PHYSICS
A black hole is a region of space where gravity is so strong that nothing can escape from it, not even light. This is why we call them "black" holes, because we can't see them directly. They are formed when a massive star collapses under its own gravity, creating a singularity, a point in space with infinite density and zero volume. Black holes can have different sizes, the smallest ones are as small as an atom but can have the mass of a mountain, the biggest ones can have the mass of a billion suns and can be as big as a whole galaxy. Black holes can also merge together and create even bigger black holes. They are fascinating objects that are still the subject of active research in astrophysics.


QUERY CONSTRUCTION

In [None]:
PART 11 - QUERY STRUCTURING FOR METADATA FILTERS

In [33]:
from langchain_community.document_loaders import YoutubeLoader

# Function to load and refine metadata extraction
def load_and_refine_youtube_data(url: str):
    try:
        # Load video data using YoutubeLoader
        docs = YoutubeLoader.from_youtube_url(url, add_video_info=True).load()
        
        # Ensure there is at least one document loaded
        if docs:
            video_metadata = docs[0].metadata
            print("Metadata loaded successfully:")
            print(video_metadata)  # Print metadata for debugging
            
            return video_metadata
        else:
            print("No documents were loaded.")
            return None
    
    except Exception as e:
        # Catch any exceptions and print error message
        print(f"Error loading YouTube video: {e}")
        return None

# Define the YouTube video URL
video_url = "https://youtu.be/lOdXUVYT69I?si=OlaJFDgATKZE8VhN"

# Call the function to load and print metadata
video_metadata = load_and_refine_youtube_data(video_url)

# Optionally, refine metadata for better readability or structure
if video_metadata:
    refined_metadata = {
        "video_title": video_metadata.get("title", "Unknown Title"),
        "video_url": video_metadata.get("url", "Unknown URL"),
        "publish_date": video_metadata.get("publish_date", "Unknown Date"),
        "author": video_metadata.get("author", "Unknown Author"),
    }
    
    print("Refined Metadata:")
    print(refined_metadata)


Error loading YouTube video: Could not import "youtube_transcript_api" Python package. Please install it with `pip install youtube-transcript-api`.
