In [None]:
# Create a basic Python script for Medical Document Search using Retrieval-Augmented Generation (RAG)
rag_script = """
import os
import pandas as pd
import faiss
import numpy as np
import logging
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import openai

# Setup
openai.api_key = "YOUR_OPENAI_API_KEY"  # Replace with your API key
logging.basicConfig(level=logging.INFO)

# Initialize embedding model and QA model
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

def load_documents(file_path):
    logging.info("Loading documents...")
    df = pd.read_csv(file_path)
    return df

def embed_documents(docs):
    logging.info("Embedding documents...")
    return embedding_model.encode(docs, convert_to_tensor=False)

def build_faiss_index(embeddings):
    logging.info("Building FAISS index...")
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return index

def retrieve_documents(query, index, doc_embeddings, docs, top_k=3):
    logging.info("Retrieving relevant documents...")
    query_embedding = embedding_model.encode([query])
    _, indices = index.search(np.array(query_embedding), top_k)
    return [docs[i] for i in indices[0]]

def generate_answer(context, query):
    logging.info("Generating answer with RAG...")
    prompt = f"Use the context below to answer the question.\n\nContext: {context}\n\nQuestion: {query}"
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful medical assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message['content']

def run_rag_pipeline(query, docs_df):
    docs = docs_df['document'].tolist()
    doc_embeddings = embed_documents(docs)
    faiss_index = build_faiss_index(np.array(doc_embeddings))
    top_docs = retrieve_documents(query, faiss_index, doc_embeddings, docs)
    context = "\\n\\n".join(top_docs)
    answer = generate_answer(context, query)
    return answer

if __name__ == "__main__":
    docs_path = "medical_documents.csv"  # Expecting a 'document' column
    docs_df = load_documents(docs_path)
    user_query = input("Enter your medical query: ")
    result = run_rag_pipeline(user_query, docs_df)
    print("\\nAnswer:")
    print(result)
"""

# Save the script to a file
file_path = "/mnt/data/medical_rag_search.py"
with open(file_path, "w") as f:
    f.write(rag_script)

file_path
