In [None]:
import requests
import json
import csv
import time

def fetch_fda_label_efficacy(drug_name):
    url = "https://api.fda.gov/drug/label.json"
    params = {
        "search": f'openfda.generic_name:"{drug_name}"',
        "limit": 1
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()

        if "results" not in data:
            return {"drug_name": drug_name, "error": "No results found"}

        result = data["results"][0]
        return {
            "drug_name": drug_name,
            "indications_and_usage": result.get("indications_and_usage", ["Not available"])[0][:1000],
            "clinical_studies": result.get("clinical_studies", ["Not available"])[0][:1000],
            "dosage_and_administration": result.get("dosage_and_administration", ["Not available"])[0][:1000],
            "warnings_and_cautions": result.get("warnings_and_cautions", ["Not available"])[0][:1000],
            "contraindications": result.get("contraindications", ["Not available"])[0][:1000],
            "pregnancy": result.get("pregnancy", ["Not available"])[0][:1000],
            "pediatric_use": result.get("pediatric_use", ["Not available"])[0][:1000],
            "geriatric_use": result.get("geriatric_use", ["Not available"])[0][:1000],
        }

    except Exception as e:
        return {"drug_name": drug_name, "error": str(e)}

# 🔢 List of 100 common generic drugs
drug_list = [
    "acetaminophen", "albuterol", "alprazolam", "amitriptyline", "amlodipine", "amoxicillin", "atorvastatin",
    "azithromycin", "benazepril", "bisoprolol", "bupropion", "buspirone", "carvedilol", "cephalexin", "ciprofloxacin",
    "citalopram", "clindamycin", "clonazepam", "clonidine", "clopidogrel", "cyclobenzaprine", "dexamethasone",
    "diazepam", "diclofenac", "dicyclomine", "digoxin", "diltiazem", "diphenhydramine", "divalproex", "donepezil",
    "doxycycline", "enalapril", "escitalopram", "esomeprazole", "famotidine", "fentanyl", "ferrous sulfate",
    "fluconazole", "fluoxetine", "fluticasone", "furosemide", "gabapentin", "glipizide", "glyburide", "guaifenesin",
    "haloperidol", "hydralazine", "hydrochlorothiazide", "hydrocodone", "hydroxyzine", "ibuprofen", "insulin glargine",
    "ipratropium", "irbesartan", "isosorbide", "ketoconazole", "lamotrigine", "lansoprazole", "lisinopril",
    "loratadine", "lorazepam", "losartan", "lovastatin", "meloxicam", "metformin", "methocarbamol", "methotrexate",
    "methylprednisolone", "metoprolol", "metronidazole", "montelukast", "morphine", "naproxen", "nitrofurantoin",
    "nitroglycerin", "nortriptyline", "omeprazole", "ondansetron", "oxycodone", "pantoprazole", "paroxetine",
    "penicillin", "phenobarbital", "phenytoin", "pioglitazone", "pravastatin", "prednisone", "pregabalin",
    "propranolol", "quetiapine", "ranitidine", "risperidone", "rosuvastatin", "sertraline", "simvastatin",
    "sulfamethoxazole", "sumatriptan", "tamsulosin", "temazepam", "topiramate", "tramadol", "trazodone", "valacyclovir",
    "valsartan", "venlafaxine", "warfarin", "zolpidem"
]

results = []
for i, drug in enumerate(drug_list):
    print(f"[{i+1}/{len(drug_list)}] Fetching: {drug}")
    data = fetch_fda_label_efficacy(drug)
    results.append(data)
    time.sleep(1)  # Respect OpenFDA rate limits

# 💾 Save as JSON
with open("drug_efficacy_data.json", "w") as f:
    json.dump(results, f, indent=2)

# 💾 Save as CSV
with open("drug_efficacy_data.csv", "w", newline='', encoding='utf-8') as f:
    fieldnames = ["drug_name", "indications_and_usage", "clinical_studies", "dosage_and_administration","warnings_and_cautions",
    "contraindications","pregnancy","pediatric_use","geriatric_use","error"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    for row in results:
        writer.writerow({
            "drug_name": row.get("drug_name"),
            "indications_and_usage": row.get("indications_and_usage", ""),
            "clinical_studies": row.get("clinical_studies", ""),
            "dosage_and_administration": row.get("dosage_and_administration", ""),
            "warnings_and_cautions": row.get("warnings_and_cautions", ""),
            "contraindications": row.get("contraindications", ""),
            "pregnancy": row.get("pregnancy", ""),
            "pediatric_use": row.get("pediatric_use", ""),
            "geriatric_use": row.get("geriatric_use", ""),
            "error": row.get("error", "")
        })

print("✅ Done! Data saved to drug_efficacy_data.json and drug_efficacy_data.csv")


In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
os.environ["LANGSMITH_TRACING_V2"] = os.getenv("LANGSMITH_TRACING_V2")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")

In [None]:
##Test Groq Connection
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="deepseek-r1-distill-llama-70b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2)
    


In [None]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system","You are a helpful translator that translates from {language1} to {language2}.",),
        ("human","{input}"),



    ]
)

In [None]:
chain = prompt|llm
chain.invoke(
    {
        "language1" : "English",
        "language2" : "Dutch",
        "input" : "This is trnslation using llm.",

    }
)

Creating a Document loader to load the product label json Data

In [None]:
from langchain_community.document_loaders import JSONLoader
loader=JSONLoader(file_path="./drug_efficacy_data.json",
                jq_schema=".[]",
                text_content=False
                )

In [None]:
document = loader.load()

Verifying Sample Data

In [None]:
document[0]

In [None]:
print(document[0].metadata)

Now as a next step , splitting the document into multiple  pages using text splitter

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
pages=text_splitter.split_documents(document)

In [None]:
len(pages)

In [None]:
os.environ["NOMIC_API_KEY"] = os.getenv("NOMIC_API_KEY")

In [None]:
from langchain_nomic import NomicEmbeddings
embeddings=NomicEmbeddings(model="nomic-embed-text-v1.5",dimensionality=350)

In [None]:
from pinecone import Pinecone


In [None]:
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

In [None]:
pc=Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [None]:
from pinecone import ServerlessSpec

In [None]:
index_name ="medicalqueryasst"

if  pc.has_index(index_name):
    pc.delete_index(index_name)
    pc.create_index(
        name=index_name,
        dimension=350,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws",region="us-east-1") 
)

In [None]:
index=pc.Index(index_name)

In [None]:
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(pages))]

In [None]:
from langchain_pinecone import PineconeVectorStore

In [None]:
vector_store=PineconeVectorStore(index=index,embedding=embeddings)

In [None]:
vector_store.add_documents(documents=pages, ids=uuids)

In [None]:
results = vector_store.similarity_search("what is acetaminophen?",k=3)

In [None]:
results

In [None]:
results1 = vector_store.similarity_search("what is acetaminophen?",filter={"drug_name": "acetaminophen"})
print(results1)

In [None]:
retriever=vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.7} #hyperparameter
)

In [None]:
retriever.invoke("acetaminophen")

In [None]:
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="deepseek-r1-distill-llama-70b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2)

In [None]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [None]:
import pprint
pprint.pprint(prompt.messages)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke("tell me about dosage  of acetaminophen")

In [None]:
rag_chain.invoke("what is fentanyl?")

In [None]:
rag_chain.invoke("what are the side effects of fentanyl?")

In [None]:
rag_chain.invoke("Are there any clinical studies on fluoxetine?")