In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
import time

In [None]:
pinecone_api_key = "" # Your pinecone api key
pc = Pinecone(api_key=pinecone_api_key)

In [None]:
index_name = "" # pinecone index name

In [None]:
if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model": "llama-text-embed-v2",
            "field_map": {"text": "chunk_text"}
        }
    )
    print(f"Creating index {index_name}... waiting 10 sec")
    time.sleep(10)
else:
    print(f"Index {index_name} already exists.")

In [None]:
document_path = "" # your files e.g., resume, certificate etc.
source_name = "" # pinecone metadata

loader = PyPDFLoader(document_path)
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
chunks = text_splitter.split_documents(pages)
print(chunks)

In [None]:
records = []
for i, chunk in enumerate(chunks):
    chunk_text = chunk.page_content
    record = {
        "_id": f"{source_name}_chunk_{i}",
        "chunk_text": chunk_text,
        "source": source_name,
        "page_number": chunk.metadata.get("page", -1)
    }
    records.append(record)

dense_index = pc.Index(index_name)
namespace = "" # pinecone namespace

dense_index.upsert_records(namespace, records)
print(f"Uploaded {len(records)} records.")

In [None]:
time.sleep(5)
stats = dense_index.describe_index_stats()
print(stats)

In [None]:
import requests

In [None]:
def query_huggingface(prompt):
    HUGGINGFACE_API_KEY = ""  # your huggingface api token
    API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"

    headers = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}

    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 256
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload)

    # Debugging aid:
    # print("Status code:", response.status_code)
    # print("Raw response text:", response.text)

    try:
        return response.json()
    except Exception as e:
        print("Error decoding JSON:", e)
        return None

In [None]:
def build_llm_prompt(top_chunks, user_question):
    # Build the chunk text first
    chunks_text = "\n\n".join(top_chunks)  # double newline looks more natural than --- separators

    # Build the full prompt with clear delimiters and Answer token
    prompt = f"""
You are an AI assistant. Here is information from Victor's resume:

=== BEGIN RESUME ===
{chunks_text}
=== END RESUME ===

Now answer the following question concisely:

Question: {user_question}

Answer:
"""
    return prompt

In [None]:
query = "What programming languages does X's know?"

# Run search
results = dense_index.search(
    namespace=namespace,   # same namespace used in upsert
    query={
        "top_k": 5,
        "inputs": {
            "text": query
        }
    }
)

# Print results
print(results)

In [None]:
top_chunks = []

for hit in results['result']['hits'][:5]:  # Top 5 chunks
    chunk_text = hit['fields']['chunk_text']
    top_chunks.append(chunk_text)

print(top_chunks)

In [None]:
user_question = "What programming languages does X's know?"

# Build prompt
prompt = build_llm_prompt(top_chunks, user_question)

# Send to Hugging Face
response = query_huggingface(prompt)

# Extract clean answer
generated_text = response[0]['generated_text']
if "Answer:" in generated_text:
    answer = generated_text.split("Answer:")[-1].strip()
else:
    answer = generated_text.strip()

# Final one-line print
print(f"LLM Answer: {answer}")