In [24]:
import os
from langchain_community.document_loaders import PyPDFLoader

def load_resume(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    return documents

In [25]:
import pandas as pd

def load_jobs(path="data/jobs.csv"):
    return pd.read_csv(path)

In [26]:
def get_resume_text(docs):
    return "\n".join(d.page_content for d in docs)

In [27]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
def chunk_resume(
        docs,chunk_size=500,chunk_overlap=50):
    full_text = get_resume_text(docs)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
    )
    chunks = text_splitter.split_text(full_text)
    return chunks

In [28]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

def rank_jobs_for_resume(jobs_df, resume_text):

    resume_emb = model.encode(resume_text, convert_to_tensor=True)
    job_embs = model.encode(jobs_df["job_description"].tolist(), convert_to_tensor=True)

    cos_scores = util.cos_sim(resume_emb, job_embs)[0]

    all_scores = cos_scores.cpu().numpy()

    ranked_jobs_df = jobs_df.copy()
    ranked_jobs_df["similarity_score"] = all_scores
    sorted_jobs_df = ranked_jobs_df.sort_values(by="similarity_score", ascending=False).reset_index(drop=True)

    return sorted_jobs_df

In [29]:
def retrieve_chunks(job_description, chunks, chunk_embeddings, threshold=0.2):
    job_emb = model.encode(job_description, convert_to_tensor=True)
    scores = util.cos_sim(job_emb, chunk_embeddings)[0]

    selected_chunks = []
    for score, chunk in zip(scores.cpu().numpy(), chunks):
        if score >= threshold:
            selected_chunks.append(chunk)

    return selected_chunks

In [30]:
def generate_email_rag(chunks, job):
    context = "\n\n".join(chunks)

    prompt = f"""
Write a short cold email (<120 words) applying for this role: {job['job_title']}.

Use ONLY the following resume context (do not add anything not present here):
{context}

Job description:
{job['job_description']}

Your email must:
- highlight the skills or projects that appear in the context
- be confident and concise
- be 5â€“6 sentences
- end with a clear call to action
"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content


In [31]:
import yagmail

def send_email(to, subject, body, attachment=None):
    EMAIL = os.getenv("EMAIL")
    APP_PASSWORD = os.getenv("APP_PASSWORD")
    yag = yagmail.SMTP(EMAIL,APP_PASSWORD)

    yag.send(to=to, subject=subject, contents=body, attachments=attachment)

In [32]:
resume_docs = load_resume("../data/resume.pdf")
resume_text = get_resume_text(resume_docs)
resume_chunks = chunk_resume(resume_docs)   
resume_chunk_embeddings = model.encode(resume_chunks, convert_to_tensor=True)

jobs = load_jobs("../data/jobs.csv")

jobs_ranked = rank_jobs_for_resume(jobs, resume_text)

print(jobs_ranked[["job_title", "similarity_score"]].head(5))


                 job_title  similarity_score
0  Machine Learning Intern          0.430860
1      Data Analyst Intern          0.391007
2               NLP Intern          0.369782
3       AI Research Intern          0.355409


In [33]:
top_job = jobs_ranked.iloc[0]
job_description = top_job["job_description"]
relevant_chunks = retrieve_chunks(job_description, resume_chunks, resume_chunk_embeddings)

In [34]:
email_content = generate_email_rag(relevant_chunks, top_job)

In [35]:
from dotenv import load_dotenv
load_dotenv()
TEST_EMAIL = os.getenv("TEST_EMAIL")
attachment_path = "../data/resume_x.pdf"
send = input("Send email? (y/n): ")
if send.lower() == 'y':
    send_email(
        #to=job['contact_email'],
        to=TEST_EMAIL,
        subject=f"Application for { jobs_ranked.iloc[0]['job_title']}",
        body=email_content,
        attachment=attachment_path
    )
    print("Email sent!")

Email sent!
