In [2]:
import os
from dotenv import load_dotenv

load_dotenv()


api_key=os.getenv("GEMINI_API_KEY")


### 1. initialize Model

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

llm = ChatGoogleGenerativeAI(
    api_key = api_key,
    model="gemini-2.0-flash"
)

In [4]:
response = llm.invoke("tell me a joke")
print(response.content)

Why don't scientists trust atoms?

Because they make up everything!


### Load the doc

In [None]:
from langchain_community.document_loaders import PyPDFLoader


loader = PyPDFLoader(r"D:\JMM Internship\M7 - Generative ai\Task 2\Flask RAG App\DATA\Zabih_Resume-6_1_1.pdf")

documents = loader.load()

In [226]:
documents

[Document(metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2024-11-12T18:53:55+05:00', 'author': 'Zabih', 'moddate': '2024-11-12T18:53:55+05:00', 'source': 'Zabih_Resume.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Zabihullah \n03190904793          Zabihullah18381@gmail.com          GitHub          LinkedIn          Kaggle          Portfolio \n \n \nEducation \n \nBachelor of Science in Software Engineering \nAbasyn University Peshawar: CGPA 3.3 \nYear of Graduation: 2024 \nSummery \n \nAI and machine learning engineer with nearly one year of hands-on experience in developing intelligent applications. Successfully \ndeveloped AI chatbots, predictive models, and web applications using advanced technologies like Langchain and Fastapi. Skilled in Python, \ndata analysis, and deploying AI-driven solutions to enhance software capabilities. \n \nWork Experience \nDataWars.io                                              

### Making chunks

In [227]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

In [228]:
len(docs)

9

### Initialize embedding model

In [229]:

embeddings = GoogleGenerativeAIEmbeddings(google_api_key=api_key, model="models/embedding-001")

### setting `Weaviate` vector db

In [230]:
from dotenv import load_dotenv
load_dotenv()
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")
weaviate_url = os.getenv("WEAVIATE_URL")

In [231]:
import weaviate
from weaviate.classes.init import Auth

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)


In [232]:
from langchain_weaviate.vectorstores import WeaviateVectorStore


vector_db = WeaviateVectorStore.from_documents(docs, embeddings, client=client)

In [233]:
# print(vector_db.similarity_search("who is Zabihullah", k=3)[0].page_content)

### Prompt template

In [234]:
from langchain_core.prompts import ChatPromptTemplate

template= """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Question: {question}
Context: {context}
Answer:
"""

prompt = ChatPromptTemplate.from_template(template)

In [235]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [236]:
output_parser=StrOutputParser()

In [237]:
retriever = vector_db.as_retriever(search_kwargs={"k": 5})


In [238]:

response = retriever.invoke("who is zabih")
for i in response:
    print("_"*80)
    print(i)

________________________________________________________________________________
page_content='Zabihullah 
03190904793          Zabihullah18381@gmail.com          GitHub          LinkedIn          Kaggle          Portfolio 
 
 
Education 
 
Bachelor of Science in Software Engineering 
Abasyn University Peshawar: CGPA 3.3 
Year of Graduation: 2024 
Summery 
 
AI and machine learning engineer with nearly one year of hands-on experience in developing intelligent applications. Successfully' metadata={'page_label': '1', 'creationdate': datetime.datetime(2024, 11, 12, 18, 53, 55, tzinfo=datetime.timezone(datetime.timedelta(seconds=18000))), 'source': 'Zabih_Resume.pdf', 'moddate': datetime.datetime(2024, 11, 12, 18, 53, 55, tzinfo=datetime.timezone(datetime.timedelta(seconds=18000))), 'creator': 'Microsoft® Word 2016', 'total_pages': 1.0, 'producer': 'Microsoft® Word 2016', 'page': 0.0, 'author': 'Zabih'}
________________________________________________________________________________
page_c

In [239]:
rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)
   

In [240]:
print(rag_chain.invoke("Who is Zabih?"))

Zabihullah is an AI and machine learning engineer with nearly one year of hands-on experience in developing intelligent applications. He is a graduate of Abasyn University Peshawar with a Bachelor of Science in Software Engineering and a CGPA of 3.3. He has experience as an AI Researcher at DataWars.io and as an Artificial Intelligence Intern at Kairiz Cyber Security.
