In [1]:
# Install necessary libraries
!pip install openai
!pip install azure-core
!pip install pinecone

# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Import userdata to get API keys securely
from google.colab import userdata

# Retrieve API keys and endpoint from Colab userdata secrets
Azure_api_key = userdata.get('AZURE_API')
Pinecone_api_key = userdata.get('PINECONE')
Azure_endpoint = userdata.get('AZURE_ENDPOINT')
Chat_endpoint = userdata.get('CHAT_ENDPOINT')
Chat_api = userdata.get('SUBSCRIPTION_KEY')

In [5]:
# Import Necessary Libraries
from openai import AzureOpenAI
from pinecone import Pinecone, ServerlessSpec
import pandas as pd
import numpy as np
from uuid import uuid4
from azure.core.credentials import AzureKeyCredential

# Define model parameters
model_name = "text-embedding-3-small"
deployment = "text-embedding-3-small"
api_version = "2024-02-01"

client = AzureOpenAI(
    azure_endpoint = Azure_endpoint,
    api_version = api_version,
    api_key = Azure_api_key
)

pc = Pinecone(api_key=Pinecone_api_key)

pc.create_index(
    name='youtube-rag-data',
    dimension=1536,
    spec=ServerlessSpec(cloud='aws',region='us-east-1')
)

print(pc.list_indexes())

[{
    "name": "dotproduct-index",
    "metric": "dotproduct",
    "host": "dotproduct-index-nlsfoxv.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "region": "us-east-1",
            "cloud": "aws",
            "read_capacity": {
                "mode": "OnDemand",
                "status": {
                    "state": "Ready",
                    "current_shards": null,
                    "current_replicas": null
                }
            }
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1536,
    "deletion_protection": "disabled",
    "tags": null
}, {
    "name": "practice",
    "metric": "cosine",
    "host": "practice-nlsfoxv.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "region": "us-east-1",
            "cloud": "aws",
            "read_capacity": {
                "mode": "OnDemand",
                "status": {
    

In [6]:
index = pc.Index('youtube-rag-data')

In [7]:
df = pd.read_csv('/content/drive/MyDrive/youtube_rag_data.csv')
df.head(5)

Unnamed: 0,id,blob,channel_id,end,published,start,text,title,url
0,35Pdoyi6ZoQ-t0.0,"{'channel_id': 'UCv83tO5cePwHMt1952IVVHw', 'en...",UCv83tO5cePwHMt1952IVVHw,74,2021-07-06 13:00:03 UTC,0,"Hi, welcome to the video. So this is the fourt...",Training and Testing an Italian BERT - Transfo...,https://youtu.be/35Pdoyi6ZoQ
1,35Pdoyi6ZoQ-t18.48,"{'channel_id': 'UCv83tO5cePwHMt1952IVVHw', 'en...",UCv83tO5cePwHMt1952IVVHw,94,2021-07-06 13:00:03 UTC,18,So we got some data. We built a tokenizer with...,Training and Testing an Italian BERT - Transfo...,https://youtu.be/35Pdoyi6ZoQ
2,35Pdoyi6ZoQ-t32.36,"{'channel_id': 'UCv83tO5cePwHMt1952IVVHw', 'en...",UCv83tO5cePwHMt1952IVVHw,108,2021-07-06 13:00:03 UTC,32,So let's move over to the code. And we see her...,Training and Testing an Italian BERT - Transfo...,https://youtu.be/35Pdoyi6ZoQ
3,35Pdoyi6ZoQ-t51.519999999999996,"{'channel_id': 'UCv83tO5cePwHMt1952IVVHw', 'en...",UCv83tO5cePwHMt1952IVVHw,125,2021-07-06 13:00:03 UTC,51,"PyTorch data loader, ready. And we can begin t...",Training and Testing an Italian BERT - Transfo...,https://youtu.be/35Pdoyi6ZoQ
4,35Pdoyi6ZoQ-t67.28,"{'channel_id': 'UCv83tO5cePwHMt1952IVVHw', 'en...",UCv83tO5cePwHMt1952IVVHw,140,2021-07-06 13:00:03 UTC,67,So when we're training a model for mass langua...,Training and Testing an Italian BERT - Transfo...,https://youtu.be/35Pdoyi6ZoQ


In [8]:
batch_limit = 100

for batch in np.array_split(df, len(df)/batch_limit):
  metadatas = [
      {
          'text_id' : row['id'],
          'text' : row['text'],
          'title' : row['title'],
          'url' : row['url'],
          'published' : row['published']
      }
      for _, row in batch.iterrows()
  ]

  texts = batch['text'].tolist()
  ids = [str(uuid4()) for _ in range(len(texts))]

  response = client.embeddings.create(
      input=texts,
      model=model_name
  )

  embeds = [np.array(x.embedding) for x in response.data]

  index.upsert(
      vectors = zip(ids, embeds, metadatas),
      namespace = 'youtube-data'
  )

  return bound(*args, **kwds)


In [9]:
def retrieve(query, top_k, namespace, emb_model):
  """
    Retrieve relevant documents for a query.

    Args:
        query: User's question
        top_k: Number of documents to retrieve
        namespace: Pinecone namespace to search
        emb_model: Embedding model name

    Returns:
        retrieved_docs: List of document texts
        sources: List of (title, url) tuples
    """

  query_response = client.embeddings.create(
        input = query,
        model = emb_model
    )

  query_emb = query_response.data[0].embedding

  docs = index.query(
        vector = query_emb,
        top_k = top_k,
        namespace = namespace,
        include_metadata = True
    )

  retrieved_docs = []
  sources = []

  for doc in docs['matches']:
    retrieved_docs.append(doc['metadata']['text'])
    sources.append((
        doc['metadata']['title'],
        doc['metadata']['url']
    ))

  return retrieved_docs, sources

query = "How to build next-level Q&A with OpenAI?"
documents, sources = retrieve(
    query=query,
    top_k = 3,
    namespace = 'youtube-data',
    emb_model = model_name
)

print(f"Retrieved {len(documents)} documents")
for i, (doc, source) in enumerate(zip(documents, sources)):
    print(f"\nDocument {i+1}:")
    print(f"Source: {source[0]}, {source[1]}")
    print(f"Text: {doc[:100]}...")

Retrieved 3 documents

Document 1:
Source: How to build a Q&A AI in Python (Open-domain Question-Answering), https://youtu.be/w1dMEWm7jBc
Text: to use for Open Domain Question Answering. We're going to start with a few examples. Over here we ha...

Document 2:
Source: How to build a Q&A AI in Python (Open-domain Question-Answering), https://youtu.be/w1dMEWm7jBc
Text: There are places where you do want to keep traditional search. But particularly for unstructured tex...

Document 3:
Source: How to build next-level Q&A with OpenAI, https://youtu.be/coaaSxys5so
Text: So let's go with, let's restrict everything to streamlit and we'll ask about OpenAI Clip. Maybe. Let...


In [12]:
def prompt_with_context_builder(query,docs):

  """
  Build a prompt with retrieved context.

  Args:
        query: User's question
        docs: List of retrieved document texts

  Returns:
        Formatted prompt string
  """

  delim = '\n\n--\n\n'
  prompt_start = 'Answer the question based on the context below. \n\nContext:\n'
  prompt_end = f'\n\nQuestion: {query}\nAnswer:'

  prompt = prompt_start + delim.join(docs) + prompt_end
  return prompt

query = "How to build next-level Q&A with OpenAI"
context_prompt = prompt_with_context_builder(query,documents)

print(context_prompt[:500])

Answer the question based on the context below. 

Context:
to use for Open Domain Question Answering. We're going to start with a few examples. Over here we have Google and we can ask Google questions like we would a normal person. So we can say, how do I tie my shoelaces? So what we have right here is three components to the question and answer. And I want you to remember these because these are relevant for what we are going to be building. We have the query at the top. We have what we can ref


In [13]:
endpoint = Chat_endpoint
model_name = "gpt-5.2-chat"
deployment = "gpt-5.2-chat-2"

subscription_key = Chat_api
api_version = "2024-12-01-preview"

client1 = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
)

def question_answering(prompt,sources,chat_model):
  """
    Generate answer using LLM with retrieved context.

    Args:
        prompt: Prompt with context and question
        sources: List of (title, url) tuples
        chat_model: OpenAI model name

    Returns:
        Answer with source citations
    """

  sys_prompt = "You are a helpful assistant that always answers questions"

  res = client1.chat.completions.create(
      model = chat_model,
      messages = [
          {'role':'system','content':sys_prompt},
          {'role':'user','content':prompt}
      ]
  )

  answer = res.choices[0].message.content.strip()

  answer += '\n\nSources:'
  for source in sources:
    answer += f'\n{source[0]}:{source}[1]'
  return answer


query = "How to build next-level Q&A with OpenAI"
answer = question_answering(
    context_prompt,
    sources,
    chat_model=deployment
)

print(answer)

**Answer:**

To build next-level Q&A with OpenAI, you use an **Open Domain Question Answering (ODQA) pipeline**. This involves:

1. **Taking a user query** and converting it into a vector using an **embedding (retrieval) model**.  
2. **Retrieving relevant context** (text, video transcripts, audio, or other unstructured data) by comparing vector similarity, which captures semantic meaning rather than just keywords.  
3. **Passing the retrieved context plus the question** to a **generative language model** (like OpenAI’s models).  
4. **Prompting the model to answer strictly based on the provided context**, so it can give accurate answers or say “I don’t know” when the information isn’t present.

This combination of semantic retrieval + generative answering enables powerful, intelligent Q&A systems beyond traditional keyword-based search.

Sources:
How to build a Q&A AI in Python (Open-domain Question-Answering):('How to build a Q&A AI in Python (Open-domain Question-Answering)', 'https

In [15]:
model_name = "text-embedding-3-small"

def rag_pipeline(query, top_k=3, namespace = 'youtube-data'):
  """
    Complete RAG pipeline: retrieve → build prompt → generate answer.

    Args:
        query: User's question
        top_k: Number of documents to retrieve
        namespace: Pinecone namespace

    Returns:
        Generated answer with sources
    """

  documents,sources = retrieve(
      query,
      top_k = top_k,
      namespace = namespace,
      emb_model = model_name
  )

  prompt_with_context = prompt_with_context_builder(query,documents)

  answer = question_answering(
      prompt_with_context,
      sources,
      chat_model = deployment
  )

  return answer

query = "How to build next-level Q&A with OpenAI?"
answer = rag_pipeline(query)
print(answer)

To build next‑level Q&A with OpenAI, you use an **Open Domain Question Answering (ODQA) pipeline**. Based on the context, the key steps are:

1. **Take a user query**  
   Start with a natural‑language question.

2. **Retrieve relevant context**  
   - Convert the question into a vector using an **embedding model**.  
   - Compare it against vectors of unstructured data (text, video transcripts, audio, docs) stored in a vector database.  
   - Retrieve the most semantically relevant chunks, not just keyword matches.

3. **Provide context to a generative model**  
   - Pass the retrieved text as *context* along with the question.  
   - Prompt the model to answer **only using the given context** (or say “I don’t know” if the answer isn’t present).

4. **Generate the answer**  
   - Use an OpenAI completion / generative model to produce a clear, natural answer grounded in that context.

In short:  
**Question → Embeddings → Semantic Retrieval → Context → OpenAI Generative Model → Answer*