In [None]:
# Imports
import requests
import gradio as gr
import time
from pinecone import Pinecone

# Pinecone setup
pinecone_api_key = "" # Your pinecone api key
pc = Pinecone(api_key=pinecone_api_key)

# Pinecone index and namespace (same as your previous notebook)
index_name = "" # pinecone index name
namespace = "" # pinecone namespace
dense_index = pc.Index(index_name)

# Huggingface setup
HUGGINGFACE_API_KEY = "" # your huggingface api token
HUGGINGFACE_API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"

In [None]:
# Query Huggingface LLM
def query_huggingface(prompt):
    headers = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}

    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 256
        }
    }

    response = requests.post(HUGGINGFACE_API_URL, headers=headers, json=payload)

    try:
        return response.json()
    except Exception as e:
        print("Error decoding JSON:", e)
        return None

# Build LLM prompt
def build_llm_prompt(top_chunks, user_question):
    chunks_text = "\n\n".join(top_chunks)

    prompt = f"""
You are an AI assistant. Here is information from Victor's resume:

=== BEGIN RESUME ===
{chunks_text}
=== END RESUME ===

Now answer the following question concisely:

Question: {user_question}

Answer:
"""
    return prompt

In [None]:
def answer_question(user_question):
    # 1. Run Pinecone search
    results = dense_index.search(
        namespace=namespace,
        query={
            "top_k": 5,
            "inputs": {
                "text": user_question
            }
        }
    )
    
    # 2. Extract top 5 chunks
    top_chunks = []
    for hit in results['result']['hits'][:5]:
        chunk_text = hit['fields']['chunk_text']
        top_chunks.append(chunk_text)
    
    # 3. Build LLM prompt
    prompt = build_llm_prompt(top_chunks, user_question)
    
    # 4. Call LLM
    response = query_huggingface(prompt)
    
    # 5. Extract clean answer
    generated_text = response[0]['generated_text']
    if "Answer:" in generated_text:
        answer = generated_text.split("Answer:")[-1].strip()
    else:
        answer = generated_text.strip()
    
    # 6. Return the answer
    return answer

In [None]:
# Build Gradio UI
demo = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(label="Ask a question about Victor Jong", placeholder="e.g. What frameworks does Victor know?"),
    outputs=gr.Textbox(label="LLM Answer"),
    title="Ask Anything about Victor Jong",
    description="Ask any question about Victor's resume. Example: 'What frameworks does Victor know?', 'What is Victor's educational background?', 'What programming languages does Victor know?'."
)

# Launch the app
demo.launch(share=True)  # share=True gives you a public link