In [1]:
!pip install --force-reinstall numpy==1.24.2

Collecting numpy==1.24.2
  Downloading numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m89.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.24.2 which is incompatible.
jax 0.5.2 requires numpy>=1.25, but you have numpy 1.24.2 which is incompatible.
albucore 0.0.24 requires numpy>=1.24.4, but you have numpy 1.24.2 which is incompatible.
blosc2 3.3.2 requires numpy>=1.26

In [1]:
!pip install -qU langchain-community==0.3.14 \
                 langchain-openai==0.2.14 \
                 unstructured==0.16.12 \
                 langgraph==0.2.61 \
                 langchain-qdrant==0.2.0 \
                 ragas==0.2.10 \
                 pymupdf \
                 cohere \
                 langchain_cohere \
                 langchain_huggingface \
                 langchain_core>=0.3.34 \
                 langchain>=0.3.18 \
                 langchain-text-splitters>=0.3.6 \
                 datasets>=3.2.0 \
                 sentence_transformers \
                 pyarrow

In [2]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [3]:
import getpass
import os
from google.colab import userdata

# OPENAI KEY
# Try to get API key from Colab secrets
api_key = userdata.get("OPENAI_API_KEY")
# If not found, prompt the user
if not api_key:
    api_key = getpass.getpass("Enter your OpenAI API key: ")
# Set it as an environment variable
os.environ["OPENAI_API_KEY"] = api_key

# QDRANT KEY
qdrnt_api_key = userdata.get("QDRNT_API_KEY")
if not qdrnt_api_key:
    qdrnt_api_key = getpass.getpass("Enter your QDRNT API key: ")
os.environ["QDRNT_API_KEY"] = qdrnt_api_key

# REDDIT KEYS
reddit_clt_id = userdata.get("reddit_clt_id")
if not reddit_clt_id:
    reddit_clt_id = getpass.getpass("Enter your reddit client id: ")
    os.environ["reddit_clt_id"] = reddit_clt_id

reddit_clt_sec = userdata.get("reddit_clt_secret")
if not reddit_clt_sec:
    reddit_clt_sec = getpass.getpass("Enter your reddit client secret: ")
    os.environ["reddit_clt_sec"] = reddit_clt_sec

# TAVILY API KEY
tavily_api_key = userdata.get("TAVILY_API_KEY")
if not tavily_api_key:
    tavily_api_key = getpass.getpass("Enter your TAVILY API key: ")
os.environ["TAVILY_API_KEY"] = tavily_api_key

#LANGCHAIN
langchain_api_key = userdata.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
if not langchain_api_key:
    langchain_api_key = getpass.getpass("Enter your LANGCHAIN API key: ")
os.environ["LANGCHAIN_API_KEY"] = langchain_api_key

#COHERE
cohere_api_key = userdata.get("COHERE_API_KEY")
if not cohere_api_key:
    cohere_api_key = getpass.getpass("Enter your COHERE API key: ")
os.environ["COHERE_API_KEY"] = cohere_api_key

In [4]:
from uuid import uuid4

os.environ["LANGCHAIN_PROJECT"] = f"AIM - SDG - {uuid4().hex[0:8]}"

In [6]:
# # 1. Mount your Google Drive
# # from google.colab import drive
# # drive.mount('/content/drive')

# # 2. Ensure the target folder exists
# import os
# drive_folder = '/content/drive/MyDrive/MedicarePilot/pdf_files'
# os.makedirs(drive_folder, exist_ok=True)

# # 3. Download the PDF and save it
# import requests

# url = "https://www.medicare.gov/publications/10050-medicare-and-you.pdf"
# r = requests.get(url, stream=True)
# r.raise_for_status()

# output_path = os.path.join(drive_folder, "10050-medicare-and-you.pdf")
# with open(output_path, "wb") as f:
#     for chunk in r.iter_content(chunk_size=8192):
#         f.write(chunk)

# print(f"✅ PDF saved to: {output_path}")


In [5]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

In [6]:
import os
from pathlib import Path
from tqdm.notebook import tqdm

from langchain_community.document_loaders import PyMuPDFLoader
from langchain.schema import Document

PDF_ROOT = Path("/content/drive/MyDrive/MedicarePilot/pdf_files")

pdf_paths = list(PDF_ROOT.rglob("*.pdf"))

# Load PDFs into Documents
pdf_docs = []
errors = []

for pdf_path in tqdm(pdf_paths, desc="Loading PDFs"):
    try:
        loader = PyMuPDFLoader(str(pdf_path))
        # load() returns a list of Document, one per page by default
        page_docs = loader.load()
        # enrich metadata with filename
        for doc in page_docs:
            # ensure metadata has source and page info
            meta = dict(doc.metadata)
            meta["source"] = str(pdf_path)
            # if loader provides 'page' or 'page_number', keep it; otherwise derive
            if "page" not in meta:
                # infer page index from doc.content order
                # PyMuPDFLoader sets metadata["page"] for each Document
                pass
            doc.metadata = meta  # type: ignore
        pdf_docs.extend(page_docs)
        print(f"  ✅ Loaded {len(page_docs)} pages from {pdf_path.name}")
    except Exception as e:
        errors.append((pdf_path.name, str(e)))
        print(f"  ⚠️  Failed to load {pdf_path.name}: {e}")

Loading PDFs:   0%|          | 0/1 [00:00<?, ?it/s]

  ✅ Loaded 128 pages from 10050-medicare-and-you.pdf




In [7]:
from ragas.testset import TestsetGenerator # Import TestsetGenerator from ragas.testset
import random

# pick 10 random documents (or whatever number you like)
sampled_docs = random.sample(pdf_docs, k=25)

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)

# now generate exactly 15 Q-A pairs from those
dataset = generator.generate_with_langchain_docs(
    sampled_docs,
    testset_size=15
)

Applying HeadlinesExtractor:   0%|          | 0/18 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/25 [00:00<?, ?it/s]

ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node
ERROR:ragas.testset.transforms.engine:unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/36 [00:00<?, ?it/s]



Applying CustomNodeFilter: 0it [00:00, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/36 [00:00<?, ?it/s]



Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/2 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/8 [00:00<?, ?it/s]

In [20]:
df = dataset.to_pandas()

# Example path to save the file
output_path = '/content/drive/MyDrive/SDG.csv'

# Assuming your DataFrame is called `df`
df.to_csv(output_path, index=False)
print(f"✅ DataFrame saved to: {output_path}")

✅ DataFrame saved to: /content/drive/MyDrive/SDG.csv


In [21]:
dataset.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Wha is the medicare plan?,[6\nIndex of topics\n Note: The bold page numb...,The context does not provide a specific defini...,single_hop_specifc_query_synthesizer
1,Whre can I get mor info abut Medigap?,[78\nSection 5: Medicare Supplement Insurance ...,You can call your State Insurance Department o...,single_hop_specifc_query_synthesizer
2,How do I get new Medicare card if lost or dama...,[108 Section 9: Find helpful contacts and more...,If you need a new copy of your Medicare card b...,single_hop_specifc_query_synthesizer
3,What does Medicare cover for outpatient hospit...,[48\nSection 2: Find out what Medicare covers\...,Medicare covers many diagnostic and treatment ...,single_hop_specifc_query_synthesizer
4,How does my other insurance work with Medicare...,[21\nSection 1: Signing up for Medicare\n21\nH...,When you have other insurance like group healt...,single_hop_specifc_query_synthesizer
5,How can I find helpful contacts and informatio...,[111\nSection 9: Find helpful contacts and mor...,Medicare allows your health care provider’s AC...,single_hop_specifc_query_synthesizer
6,Original Medicare it good?,[57\n57\n Note: Go to pages 119–122 for defini...,Original Medicare is one of your Medicare heal...,single_hop_specifc_query_synthesizer
7,How do I get new Medicare card if lost or dama...,[108 Section 9: Find helpful contacts and more...,If you need a new copy of your Medicare card b...,single_hop_specifc_query_synthesizer


# RAG

In [22]:
import tiktoken

from langchain.text_splitter import RecursiveCharacterTextSplitter

def tiktoken_len(text):
    tokens = tiktoken.encoding_for_model("gpt-4o-mini").encode(
        text,
    )
    return len(tokens)

In [23]:
text_splitter1 = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    length_function = tiktoken_len,
)

text_splitter2 = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100,
    length_function = tiktoken_len,
)

In [24]:
split_chunks1 = text_splitter1.split_documents(pdf_docs)
split_chunks2 = text_splitter2.split_documents(pdf_docs)

In [25]:
len(split_chunks1)

222

In [26]:
len(split_chunks2)

128

In [27]:
from langchain_openai import OpenAIEmbeddings

embeddings1 = OpenAIEmbeddings(model="text-embedding-3-small")

embeddings2 = OpenAIEmbeddings(model="text-embedding-3-large")

In [28]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client1 = QdrantClient(":memory:")

client1.create_collection(
    collection_name="medicare_signup1",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

vector_store1 = QdrantVectorStore(
    client=client1,
    collection_name="medicare_signup1",
    embedding=embeddings1,
)

### Larger model

client2 = QdrantClient(":memory:")

client2.create_collection(
    collection_name="medicare_signup2",
    vectors_config=VectorParams(size=3072, distance=Distance.COSINE),
)



vector_store2 = QdrantVectorStore(
    client=client2,
    collection_name="medicare_signup2",
    embedding=embeddings2,
)

In [None]:
# _ = vector_store.add_documents(documents=split_chunks1)

In [29]:
_ = vector_store1.add_documents(documents=split_chunks2)

In [30]:
_ = vector_store2.add_documents(documents=split_chunks2)

In [31]:
retriever1 = vector_store1.as_retriever(search_kwargs={"k": 5})

In [32]:
retriever2 = vector_store2.as_retriever(search_kwargs={"k": 5})

In [33]:
def retrieve1(state):
  retrieved_docs = retriever1.invoke(state["question"])
  return {"context" : retrieved_docs}

In [34]:
def retrieve2(state):
  retrieved_docs = retriever2.invoke(state["question"])
  return {"context" : retrieved_docs}

In [35]:
from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """\
You are a seasoned health insurance advisor with over 30 years in the industry.
You have tremendous knowledge about the industry and the different players.
You keep track of the changes in regulations and you are here to advise the user with their questions or concerns.
If there are any industry jargons in your responses make them easy to understand and provide details of what they are.
Wherever available provide references.
If you are unable to get the answer from the context provided, just say that you don't know. Don't try to make up an answer.

### Question
{question}

### Context
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [36]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [37]:
def generate(state):
  docs_content = "\n\n".join(doc.page_content for doc in state["context"])
  messages = rag_prompt.format_messages(question=state["question"], context=docs_content)
  response = llm.invoke(messages)
  return {"response" : response.content}

In [38]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.documents import Document

class State(TypedDict):
  question: str
  context: List[Document]
  response: str

In [39]:
graph_builder1 = StateGraph(State).add_sequence([retrieve1, generate])
graph_builder1.add_edge(START, "retrieve1")
graph1 = graph_builder1.compile()

graph_builder2 = StateGraph(State).add_sequence([retrieve2, generate])
graph_builder2.add_edge(START, "retrieve2")
graph2 = graph_builder2.compile()

In [40]:
response1 = graph1.invoke({"question" : "How are LLM agents useful?"})
print(response1['response'])

I'm sorry, but I don't know how LLM agents are useful in the context of health insurance or related topics. If you have any questions specific to health insurance or Medicare, feel free to ask!


In [30]:
response1 = graph1.invoke({"question" : "How much would a stay in a long term care?"})
print(response1['response'])

The cost of a long-term care stay can vary widely depending on several factors, including the type of facility, geographic location, and the level of care required. Here are some general estimates based on common types of long-term care:

1. **Assisted Living Facilities**: The average monthly cost for assisted living in the United States can range from $3,000 to $5,000, depending on the location and services offered.

2. **Nursing Homes**: For nursing homes, the average daily cost can range from $200 to $400 or more, translating to approximately $6,000 to $12,000 per month. Costs may be higher for private rooms compared to semi-private rooms.

3. **Home Care Services**: If care is provided at home, costs typically range from $20 to $50 per hour for home health aides, leading to a monthly cost that can vary widely based on the number of hours of care needed.

It's important to note that Medicare does not cover non-medical long-term care services, such as assistance with daily activities

In [31]:
response2 = graph2.invoke({"question" : "How much would a stay in a long term care?"})
print(response2['response'])

The cost of a long-term care stay can vary significantly based on various factors, including the type of facility, the level of care required, and the geographical location. Unfortunately, the provided context does not specify the costs associated with long-term care stays directly.

However, I can provide some general information:

1. **Types of Long-Term Care Facilities**: 
   - **Nursing Homes**: These facilities provide 24-hour medical care. Costs can range from $7,000 to over $10,000 per month, depending on location and services offered.
   - **Assisted Living Facilities**: These provide a combination of housing and personal care services. Monthly costs can range from $3,000 to $7,000.
   - **In-Home Care**: Hiring a caregiver to assist at home can cost between $20 to $50 per hour, depending on the level of care needed.

2. **Medicare Coverage**: 
   - Medicare typically does not cover long-term care. While it may cover short-term stays in skilled nursing facilities (usually after

In [None]:
response1 = graph1.invoke({"question" : "Does Medicare pay for long term care stay?"})
print(response1['response'])

In [41]:
for test_row in dataset:
  response1 = graph1.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response1["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response1["context"]]

In [42]:
dataset.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,synthesizer_name
0,Wha is the medicare plan?,[121\n121\nSection 10: Definitions\n121\nMedic...,[6\nIndex of topics\n Note: The bold page numb...,Medicare is a federal health insurance program...,The context does not provide a specific defini...,single_hop_specifc_query_synthesizer
1,Whre can I get mor info abut Medigap?,[78\nSection 5: Medicare Supplement Insurance ...,[78\nSection 5: Medicare Supplement Insurance ...,You can find more information about Medigap (M...,You can call your State Insurance Department o...,single_hop_specifc_query_synthesizer
2,How do I get new Medicare card if lost or dama...,[108 Section 9: Find helpful contacts and more...,[108 Section 9: Find helpful contacts and more...,"If you've lost or damaged your Medicare card, ...",If you need a new copy of your Medicare card b...,single_hop_specifc_query_synthesizer
3,What does Medicare cover for outpatient hospit...,[48\nSection 2: Find out what Medicare covers\...,[48\nSection 2: Find out what Medicare covers\...,Medicare covers a variety of outpatient hospit...,Medicare covers many diagnostic and treatment ...,single_hop_specifc_query_synthesizer
4,How does my other insurance work with Medicare...,[21\nSection 1: Signing up for Medicare\n21\nH...,[21\nSection 1: Signing up for Medicare\n21\nH...,When you have other insurance in addition to M...,When you have other insurance like group healt...,single_hop_specifc_query_synthesizer
5,How can I find helpful contacts and informatio...,[111\nSection 9: Find helpful contacts and mor...,[111\nSection 9: Find helpful contacts and mor...,To find helpful contacts and information about...,Medicare allows your health care provider’s AC...,single_hop_specifc_query_synthesizer
6,Original Medicare it good?,[57\n57\n Note: Go to pages 119–122 for defini...,[57\n57\n Note: Go to pages 119–122 for defini...,Original Medicare can be a good option for man...,Original Medicare is one of your Medicare heal...,single_hop_specifc_query_synthesizer
7,How do I get new Medicare card if lost or dama...,[108 Section 9: Find helpful contacts and more...,[108 Section 9: Find helpful contacts and more...,"If you've lost or damaged your Medicare card, ...",If you need a new copy of your Medicare card b...,single_hop_specifc_query_synthesizer


In [43]:
from ragas import EvaluationDataset

evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())

In [44]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))

In [45]:
#Chunk size - 500 tokens
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

result1 = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result1

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[22]: LLMDidNotFinishException(The LLM generation was not completed. Please increase try increasing the max_tokens and try again.)
ERROR:ragas.executor:Exception raised in Job[41]: TimeoutError()


{'context_recall': 1.0000, 'faithfulness': 0.8177, 'factual_correctness': 0.5550, 'answer_relevancy': 0.8308, 'context_entity_recall': 0.4384, 'noise_sensitivity_relevant': 0.3790}

In [46]:
#Chunk size - 1000 tokens
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

result2 = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result2

Evaluating:   0%|          | 0/48 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[22]: LLMDidNotFinishException(The LLM generation was not completed. Please increase try increasing the max_tokens and try again.)


{'context_recall': 1.0000, 'faithfulness': 0.8279, 'factual_correctness': 0.5588, 'answer_relevancy': 0.8282, 'context_entity_recall': 0.4241, 'noise_sensitivity_relevant': 0.3660}

In [41]:
print(result1)

{'context_recall': 0.4000, 'faithfulness': 0.8529, 'factual_correctness': 0.2840, 'answer_relevancy': 0.9474, 'context_entity_recall': 0.2783, 'noise_sensitivity_relevant': 0.4429}


In [42]:
print(result2)

{'context_recall': 0.4000, 'faithfulness': 0.8315, 'factual_correctness': 0.2760, 'answer_relevancy': 0.9474, 'context_entity_recall': 0.3059, 'noise_sensitivity_relevant': 0.3962}


# Using Cohere

In [43]:
retriever = vector_store2.as_retriever(search_kwargs={"k": 20})

In [44]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

def retrieve_adjusted(state):
  compressor = CohereRerank(model="rerank-v3.5")
  compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever, search_kwargs={"k": 5}
  )
  retrieved_docs = compression_retriever.invoke(state["question"])
  return {"context" : retrieved_docs}

In [45]:
class State(TypedDict):
  question: str
  context: List[Document]
  response: str

graph_builder = StateGraph(State).add_sequence([retrieve_adjusted, generate])
graph_builder.add_edge(START, "retrieve_adjusted")
graph = graph_builder.compile()

In [46]:
response = graph.invoke({"question" : "Does Medicare pay for long term care stay?"})
response["response"]

"Medicare does not pay for long-term care stays that are considered non-medical. This includes personal care assistance, which involves help with daily activities such as dressing, bathing, and using the bathroom, as well as services like home-delivered meals or adult day health care. \n\nWhile Medicare may cover skilled nursing facility care, this is only under specific conditions: you must have a medically necessary inpatient hospital stay of at least three days, and the care you receive at the skilled nursing facility must be for a condition related to that hospital stay. The care must also be skilled care, which means it requires the services of licensed professionals, such as nurses or therapists.\n\nFor long-term care needs that are not covered by Medicare, you might want to consider Medicaid or private long-term care insurance options. Medicaid can cover some long-term care services, but eligibility varies by state, so it's essential to check your local Medicaid office for speci

In [47]:
import time

for test_row in dataset:
  response = graph.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]
  time.sleep(2) # To try to avoid rate limiting.

In [48]:
result3 = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result3

Evaluating:   0%|          | 0/30 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[16]: LLMDidNotFinishException(The LLM generation was not completed. Please increase try increasing the max_tokens and try again.)


{'context_recall': 0.4000, 'faithfulness': 0.8561, 'factual_correctness': 0.2620, 'answer_relevancy': 0.9474, 'context_entity_recall': 0.2831, 'noise_sensitivity_relevant': 0.3742}

In [51]:
print(result3)

{'context_recall': 0.4000, 'faithfulness': 0.8561, 'factual_correctness': 0.2620, 'answer_relevancy': 0.9474, 'context_entity_recall': 0.2831, 'noise_sensitivity_relevant': 0.3742}


In [49]:
print(result1)

{'context_recall': 0.4000, 'faithfulness': 0.8529, 'factual_correctness': 0.2840, 'answer_relevancy': 0.9474, 'context_entity_recall': 0.2783, 'noise_sensitivity_relevant': 0.4429}


In [50]:
print(result2)

{'context_recall': 0.4000, 'faithfulness': 0.8315, 'factual_correctness': 0.2760, 'answer_relevancy': 0.9474, 'context_entity_recall': 0.3059, 'noise_sensitivity_relevant': 0.3962}


Cohere is not any better than Large embedding but is slightly better than the openai small embedding

# Fine tuned embedding model

In [47]:
import nest_asyncio

nest_asyncio.apply()

In [48]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings4 = HuggingFaceEmbeddings(model_name="vivnatan/snowflake-arctic-embed-l-medicare")

RuntimeError: Failed to import transformers.modeling_utils because of the following error (look up to see its traceback):
module 'numpy' has no attribute 'dtypes'

In [None]:
### Larger model

client4 = QdrantClient(":memory:")

client4.create_collection(
    collection_name="medicare_signup4",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)



vector_store4 = QdrantVectorStore(
    client=client4,
    collection_name="medicare_signup4",
    embedding=embeddings4,
)

In [None]:
_ = vector_store4.add_documents(documents=split_chunks2)

In [None]:
retriever4 = vector_store4.as_retriever(search_kwargs={"k": 5})

In [None]:
def retrieve4(state):
  retrieved_docs = retriever4.invoke(state["question"])
  return {"context" : retrieved_docs}

In [None]:
graph_builder4 = StateGraph(State).add_sequence([retrieve4, generate])
graph_builder4.add_edge(START, "retrieve4")
graph2 = graph_builder4.compile()

In [None]:
#Chunk size - 1000 tokens
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

result4 = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result4

In [53]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import BSHTMLLoader

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 25,
    length_function = tiktoken_len
)

In [57]:
training_documents = split_chunks1
# text_splitter.split_documents(text_loader.load())

In [58]:
len(training_documents)

222

In [59]:
import uuid

id_set = set()

for document in training_documents:
  id = str(uuid.uuid4())
  while id in id_set:
    id = uuid.uuid4()
  id_set.add(id)
  document.metadata["id"] = id

In [63]:
training_split_documents = training_documents[:len(training_documents) - 40]
val_split_documents = training_documents[len(training_documents) - 40:len(training_documents)-20]
test_split_documents = training_documents[len(training_documents)-20:]

In [64]:
len(training_split_documents), len(val_split_documents), len(test_split_documents)

(182, 20, 20)

In [65]:
from langchain_openai import ChatOpenAI

qa_chat_model = ChatOpenAI(
    model="gpt-4.1-mini",
    temperature=0
)

In [66]:
from langchain_core.prompts import ChatPromptTemplate

qa_prompt = """\
Given the following context, you must generate questions based on only the provided context.

You are to generate {n_questions} questions which should be provided in the following format:

1. QUESTION #1
2. QUESTION #2
...

Context:
{context}
"""

qa_prompt_template = ChatPromptTemplate.from_template(qa_prompt)

In [67]:
question_generation_chain = qa_prompt_template | qa_chat_model

In [68]:
import tqdm
import asyncio

"""
Sample Usage of TQDM:

for i in tqdm.tqdm(range(10)):
  time.sleep(1)
"""

async def process_document(document, n_questions):
    questions_generated = await question_generation_chain.ainvoke({"context": document.page_content, "n_questions": n_questions})

    doc_questions = {}
    doc_relevant_docs = {}

    for question in questions_generated.content.split("\n"):
        question_id = str(uuid.uuid4())
        doc_questions[question_id] = "".join(question.split(".")[1:]).strip()
        doc_relevant_docs[question_id] = [document.metadata["id"]]

    return doc_questions, doc_relevant_docs

async def create_questions(documents, n_questions):
    tasks = [process_document(doc, n_questions) for doc in documents]

    questions = {}
    relevant_docs = {}

    for task in tqdm.tqdm(asyncio.as_completed(tasks), total=len(documents), desc="Processing documents"):
        doc_questions, doc_relevant_docs = await task
        questions.update(doc_questions)
        relevant_docs.update(doc_relevant_docs)

    return questions, relevant_docs

In [71]:
training_questions, training_relevant_contexts = await create_questions(training_split_documents, 5)

Processing documents: 100%|██████████| 182/182 [00:12<00:00, 14.42it/s]


In [70]:
training_questions

{'6db26c7a-a30f-4caa-b184-1c770ff8cf90': 'How can you safely store and discard unused prescription opioids?',
 '2e7e476f-8824-4baf-abbe-896374e0fdb2': 'Where can you find more information on safe and effective pain management and opioid use?',
 'e9a8ef9e-311e-4d57-bc03-12bed772df5e': 'Do Medicare drug plans offer automatic prescription refills by mail?',
 'f7eb3b27-4e7d-4ddf-b4e9-da12f4841c96': 'What pages cover information about Advance Beneficiary Notice of Non-coverage (ABN)?',
 '51147819-40b9-47ef-9bf6-c8db1de72d2e': 'On which pages can you find details about Behavioral health integration services?',
 'a0c42bdf-a94f-4160-9c35-7de75bb19ad2': 'Where in the document is the topic of Appeals discussed?',
 'a14e61ea-dc4d-4c67-a2de-c2e48908dcd2': 'What are the costs associated with the immunosuppressive drug benefit under Medicare in 2025?',
 '98116705-097d-4d1f-9d18-cb1295414de8': 'Under what conditions does Medicare cover health care services while traveling outside the US?',
 '4bf7d143

In [72]:
val_questions, val_relevant_contexts = await create_questions(val_split_documents, 10)

Processing documents: 100%|██████████| 20/20 [00:05<00:00,  3.78it/s]


In [73]:
test_questions, test_relevant_contexts = await create_questions(test_split_documents, 10)

Processing documents: 100%|██████████| 20/20 [00:05<00:00,  3.66it/s]


In [74]:
import json

training_corpus = {train_item.metadata["id"] : train_item.page_content for train_item in training_split_documents}

train_dataset = {
    "questions" : training_questions,
    "relevant_contexts" : training_relevant_contexts,
    "corpus" : training_corpus
}

with open("training_dataset.jsonl", "w") as f:
  json.dump(train_dataset, f)

In [75]:
val_corpus = {val_item.metadata["id"] : val_item.page_content for val_item in val_split_documents}

val_dataset = {
    "questions" : val_questions,
    "relevant_contexts" : val_relevant_contexts,
    "corpus" : val_corpus
}

with open("val_dataset.jsonl", "w") as f:
  json.dump(val_dataset, f)

In [76]:
train_corpus = {test_item.metadata["id"] : test_item.page_content for test_item in test_split_documents}

test_dataset = {
    "questions" : test_questions,
    "relevant_contexts" : test_relevant_contexts,
    "corpus" : train_corpus
}

with open("test_dataset.jsonl", "w") as f:
  json.dump(test_dataset, f)