In [2]:
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import openai
import os


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load cleaned data and fine-tuned model
df = pd.read_csv("cleaned_quotes.csv")
model = SentenceTransformer("./model/fine_tuned_quote_model")


In [4]:
# Create embeddings for all quotes
corpus = df["combined"].tolist()
corpus_embeddings = model.encode(corpus, convert_to_numpy=True, show_progress_bar=True)


Batches: 100%|██████████| 79/79 [00:28<00:00,  2.73it/s]


In [5]:
embedding_dim = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(corpus_embeddings)
print(f"FAISS index built with {index.ntotal} entries")


FAISS index built with 2508 entries


In [6]:
# Function to perform retrieval
def retrieve_quotes(query, top_k=5):
    query_embedding = model.encode([query], convert_to_numpy=True)
    scores, indices = index.search(query_embedding, top_k)
    results = []
    for idx, score in zip(indices[0], scores[0]):
        results.append({
            "quote": df.iloc[idx]["quote"],
            "author": df.iloc[idx]["author"],
            "tags": df.iloc[idx]["tags"],
            "score": float(score),
        })
    return results


In [15]:
from openai import OpenAI
import os

def generate_answer(query, context_quotes):
    # Initialize client using environment variable
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    # Create a prompt using retrieved quotes
    context_text = "\n".join([f"- {q['quote']} ({q['author']})" for q in context_quotes])
    
    prompt = f"""
You are a helpful assistant. A user asked the following query:
"{query}"

Here are some relevant quotes:
{context_text}

Based on these quotes, provide a helpful and relevant response or summary:
"""

    # Call the LLM
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content.strip()


In [16]:
query = "quotes about resilience by women authors"
retrieved = retrieve_quotes(query)
print("\nTop Retrieved Quotes:")
for r in retrieved:
    print(f"- {r['quote']} ({r['author']})")



Top Retrieved Quotes:
- “i hate men who are afraid of women's strength.” (anaã¯s nin,)
- “i hate to hear you talk about all women as if they were fine ladies instead of rational creatures. none of us want to be in calm waters all our lives.” (jane austen,)
- “well-behaved women seldom make history.” (laurel thatcher ulrich,)
- “a woman is like a tea bag; you never know how strong it is until it's in hot water.” (eleanor roosevelt)
- “better to be strong than pretty and useless.” (lilith saintcrow,)


In [17]:
# Generate an answer
answer = generate_answer(query, retrieved)
print("\nGenerated Answer:")
print(answer)



Generated Answer:
Resilience is a powerful theme often echoed by women authors throughout history. Here are some impactful quotes that capture the essence of resilience as experienced and expressed by women:

1. **Anaïs Nin** highlights the strength of women, stating, “I hate men who are afraid of women's strength.” This quote challenges societal norms and celebrates the fortitude inherent in women.

2. **Jane Austen** reminds us that women are not merely to be viewed as delicate beings. She asserts, “I hate to hear you talk about all women as if they were fine ladies instead of rational creatures. None of us want to be in calm waters all our lives.” Here, she acknowledges the desire for challenge and growth.

3. **Laurel Thatcher Ulrich** provocatively notes, “Well-behaved women seldom make history,” indicating that resilience often requires breaking the mold and pushing against societal expectations.

4. **Eleanor Roosevelt** uses the metaphor of a tea bag to express strength, sayin

In [18]:
# Step 4: RAG Evaluation with RAGAS
examples = [
    {
        "question": query,
        "answer": answer,
        "contexts": [r["quote"] for r in retrieved],
        "ground_truth": "Quotes about resilience from women authors emphasizing strength through adversity."
    }
]


In [20]:
from datasets import Dataset

dataset = Dataset.from_list(examples)


In [25]:
# Evaluate with RAGAS
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from ragas import evaluate

results = evaluate(
    dataset,
    metrics=[faithfulness, answer_relevancy, context_precision, context_recall]
)



Evaluating: 100%|██████████| 4/4 [00:23<00:00,  5.79s/it]


In [26]:
print("\nRAG Evaluation Results:")
print(results)



RAG Evaluation Results:
{'faithfulness': 1.0000, 'answer_relevancy': 0.9650, 'context_precision': 0.2500, 'context_recall': 1.0000}


In [27]:
pip install streamlit 


Collecting streamlit
  Using cached streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Using cached cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9,>=7.0 (from streamlit)
  Using cached click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting protobuf<7,>=3.20 (from streamlit)
  Using cached protobuf-6.31.1-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Collecting toml<2,>=0.10.1 (from streamlit)
  Using cached toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Using cached watchdog-6.0.0-py3-none-win_amd64.whl.metadata (44 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Using cached GitPython-3.1.44-py3-none-any.whl.metadata (13 kB)
Collec