In [19]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [20]:
import os

# Set environment variables
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'cortex'

# Get keys from the environment
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

if langchain_api_key:
    os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
else:
    raise ValueError("LANGCHAIN_API_KEY is not set in the environment.")

if groq_api_key:
    os.environ['GROQ_API_KEY'] = groq_api_key
else:
    raise ValueError("GROQ_API_KEY is not set in the environment.")

PART 10 - LOGICAL AND SEMANTIC ROUTING

In [21]:
from typing import Literal
from pydantic import BaseModel, Field  # Updated import
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

# Data model for routing queries
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question, choose which datasource would be most relevant for answering their question",
    )

# Initialize the LLM with function call
llm = ChatGroq(temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

# Define the system prompt
system = """You are an expert at routing a user question to the appropriate data source.
Based on the programming language the question is referring to, route it to the relevant data source."""

# Create the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define the router
router = prompt | structured_llm

# Example usage
def route_query(question: str) -> RouteQuery:
    try:
        # Use the router with the correct input format
        result = router.invoke({"question": question})
        return result
    except Exception as e:
        print(f"Error routing query: {e}")
        return None

# Example query
result = route_query("How do I create a list in Python?")
print(result)


datasource='python_docs'


In [22]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question": question})

In [23]:
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        ### Logic here 
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        ### Logic here 
        return "chain for js_docs"
    else:
        ### Logic here 
        return "golang_docs"

from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)

In [24]:
full_chain.invoke({"question": question})

'chain for python_docs'

Semantic Routing

In [25]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Two prompts
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

# Embed prompts
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
prompt_templates = [physics_template, math_template]
prompt_embeddings = hf_embeddings.embed_documents(prompt_templates)

# Route question to prompt 
def prompt_router(input):
    # Embed question
    query_embedding = hf_embeddings.embed_query(input["query"])
    # Compute similarity
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    # Chosen prompt 
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)


chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatGroq()
    | StrOutputParser()
)

print(chain.invoke("What's a black hole"))

Using PHYSICS
A black hole is a region in space where gravity is so strong that nothing, not even light, can escape from it. They are formed when massive stars collapse under their own gravity after they have exhausted their nuclear fuel. The core of the star compresses and heats up, causing the star to explode in a supernova. What remains is an incredibly dense object called a stellar-mass black hole.

Black holes can also form in the center of galaxies, where huge amounts of matter have accumulated over time. These supermassive black holes can have masses millions or even billions of times greater than our sun.

Black holes are called "black" because they do not emit any light. They can only be detected indirectly, through their effects on surrounding matter. For example, matter that falls towards a black hole will be heated and will emit x-rays that can be detected by telescopes.

I hope that helps! Let me know if you have any other questions.


QUERY CONSTRUCTION

PART 11 - QUERY STRUCTURING FOR METADATA FILTERS

In [30]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import yt_dlp

def get_youtube_metadata(url: str):
    try:
        ydl_opts = {
            'quiet': True,
            'noplaylist': True,
            'extract_flat': True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
        
        metadata = {
            "video_title": info.get("title", "Unknown Title"),
            "video_url": info.get("webpage_url", "Unknown URL"),
            "publish_date": info.get("upload_date", "Unknown Date"),
            "author": info.get("uploader", "Unknown Author"),
        }
        
        return metadata

    except Exception as e:
        print(f"Error extracting YouTube metadata: {e}")
        return None

# Example usage
video_url = "https://www.youtube.com/watch?v=lOdXUVYT69I"
metadata = get_youtube_metadata(video_url)
print(metadata)




{'video_title': 'GraphRAG - The Most Advanced Futuristic RAG | Introduction, Setup, Working, Testing', 'video_url': 'https://www.youtube.com/watch?v=lOdXUVYT69I', 'publish_date': '20240703', 'author': 'Neural Hacks with Vasanth'}
