In [1]:
API_KEY = "AIzaSyDbvETkEt3FC4x-Ir3UodhvtQMRDQbEDqw"

In [2]:
from pypdf import PdfReader
def read_pdf(file):
    try:
        reader = PdfReader(file)
        return "\n".join([page.extract_text() or "" for page in reader.pages])
    except Exception as e:
        print(f"[ERROR] while reading PDF: {str(e)}")

In [4]:
import tiktoken
def chunk_text(text):
    max_tokens = 30
    tokenizer = tiktoken.get_encoding("cl100k_base")
    words = text.split()
    chunks, chunk, tokens = [], [], 0
    for word in words:
        token_count = len(tokenizer.encode(word))
        if tokens + token_count > max_tokens:
            chunks.append(" ".join(chunk))
            chunk, tokens = [word], token_count
        else:
            chunk.append(word)
            tokens += token_count
    if chunk:
        chunks.append(" ".join(chunk))
    return chunks


In [33]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

chat_model = ChatGoogleGenerativeAI(api_key=API_KEY, model='gemini-1.5-flash', temparature=0.2)

embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=API_KEY, model="models/embedding-001")

In [7]:
def get_embedding(text):
    vector = embedding_model.embed_query(text)
    return vector

In [8]:
import faiss
import numpy as np
def build_index(chunks):
    embeddings = [get_embedding(chunk) for chunk in chunks]
    dims = len(embeddings[0])
    index = faiss.IndexFlatL2(dims)
    index.add(np.array(embeddings))
    return index, chunks
    

In [45]:
from langchain_core.prompts import PromptTemplate
def handle_query(query, index, chunks):
    query_emb = np.array(get_embedding(query)).reshape(1, -1)
    distances, indices = index.search(query_emb, k=4)
    relavent_chunks = [chunks[i] for i in indices[0]]
    context = "\n\n".join(relavent_chunks)
    
    prompt = PromptTemplate(
        template="""
        You are an helpful assistant to assist healthcare professionals.
        
        Context:
        {context}
        
        Question:
        {query}
        
        Answer:""",
        input_variables=['context', 'query']
    )
    
    parser = StrOutputParser()
    chain = prompt | chat_model | parser
    response = chain.invoke({"query": query, 'context': context})
    return response

In [41]:
text = read_pdf('./cardiology.pdf')
text

'Cardiology is a branch of medicine that deals with the disorders of the heart and the blood vessels.\nIt involves medical diagnosis and treatment of congenital heart defects, coronary artery disease,\nand heart failure.\nCardiologists are doctors who specialize in this field and are trained to manage complex cardiac\nconditions.\nElectrocardiograms (ECGs) are commonly used to measure the electrical activity of the heart.\nEchocardiography uses ultrasound waves to create images of the heart for diagnostic purposes.\nStress tests help to determine how well the heart functions under physical activity.\nCardiac catheterization is a procedure where a thin tube is inserted into the heart for diagnosis or\ntreatment.\nCommon risk factors for heart disease include high blood pressure, smoking, diabetes, and high\ncholesterol.\nPreventive cardiology focuses on lifestyle changes to reduce heart disease risks.\nHypertension is one of the most important modifiable risk factors for cardiovascular 

In [42]:
chunks = chunk_text(text)
chunks

['Cardiology is a branch of medicine that deals with the disorders of the heart and the blood vessels. It involves medical',
 'diagnosis and treatment of congenital heart defects, coronary artery disease, and heart failure. Cardiologists are',
 'doctors who specialize in this field and are trained to manage complex cardiac conditions. Electrocardiograms (ECGs) are commonly used',
 'to measure the electrical activity of the heart. Echocardiography uses ultrasound waves to create images of the heart for diagnostic',
 'purposes. Stress tests help to determine how well the heart functions under physical activity. Cardiac catheterization is a procedure where a',
 'thin tube is inserted into the heart for diagnosis or treatment. Common risk factors for heart disease include high blood pressure,',
 'smoking, diabetes, and high cholesterol. Preventive cardiology focuses on lifestyle changes to reduce heart disease',
 'risks. Hypertension is one of the most important modifiable risk factors for

In [43]:
index, chunks = build_index(chunks)

In [47]:
query = "tell me 2 points to manage diabeties"

answer = handle_query(query, index, chunks)
print(answer)

To manage diabetes effectively, focus on these two key points:

1. **Maintain healthy blood sugar levels:** This involves careful monitoring of blood glucose levels, following a prescribed diet plan (often involving portion control and limiting sugary foods and refined carbohydrates), and adhering to any medication regimen (insulin or oral medications) as directed by your doctor.

2. **Adopt a healthy lifestyle:** This encompasses regular physical activity (aim for at least 150 minutes of moderate-intensity exercise per week), maintaining a healthy weight, and managing stress levels.  These lifestyle changes significantly contribute to better blood sugar control and overall cardiovascular health.
