### Import Libraries

In [2]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import torch
from pathlib import Path

OUTPUT_DIR = r"output"
Path(OUTPUT_DIR).mkdir(exist_ok=True)





### Load Parsed Resumes + Job Descriptions

In [4]:
resumes_df = pd.read_csv(f"{OUTPUT_DIR}/parsed_resumes.csv")
jobs_df = pd.read_csv(r"processed/job_descriptions_clean.csv")

print("Resumes:", resumes_df.shape)
print("Jobs:", jobs_df.shape)

resumes_df.head(), jobs_df.head()


Resumes: (2484, 8)
Jobs: (22000, 15)


(       filename                            name email           phone  \
 0  10554236.pdf                            Wing   NaN             NaN   
 1  10674770.pdf                           Adobe   NaN             NaN   
 2  11163645.pdf                    Gail L. Lugo   NaN  2 864-472-7092   
 3  11759079.pdf  John R. Jones Accounting Award   NaN             NaN   
 4  12065211.pdf                      reconcileÂ   NaN     2001 - 2002   
 
              skills education  experience_years  \
 0  ['excel', 'aws']        []                 0   
 1         ['excel']        []                 0   
 2         ['excel']        []                 0   
 3         ['excel']        []                 0   
 4  ['excel', 'sql']        []                 0   
 
                                                 text  
 0  ACCOUNTANT Summary Financial Accountant specia...  
 1  STAFF ACCOUNTANT Summary Highly analytical and...  
 2  ACCOUNTANT Professional Summary To obtain a po...  
 3  SENIOR ACCOU

### Detect Job Description Column Automatically

In [5]:
possible_desc_cols = [
    "clean_description", "description", "job_description",
    "text", "body", "jd", "content"
]

job_text_col = None
for col in possible_desc_cols:
    if col in jobs_df.columns:
        job_text_col = col
        break

print("Detected job description column:", job_text_col)

if job_text_col is None:
    raise ValueError("No job description column found.")


Detected job description column: clean_description


### Prepare Text for Embeddings

In [6]:
resumes_df['embedding_text'] = resumes_df['text'].astype(str)
jobs_df['embedding_text'] = jobs_df[job_text_col].astype(str)

resumes_df[['filename','embedding_text']].head()


Unnamed: 0,filename,embedding_text
0,10554236.pdf,ACCOUNTANT Summary Financial Accountant specia...
1,10674770.pdf,STAFF ACCOUNTANT Summary Highly analytical and...
2,11163645.pdf,ACCOUNTANT Professional Summary To obtain a po...
3,11759079.pdf,SENIOR ACCOUNTANT Experience Company Name June...
4,12065211.pdf,SENIOR ACCOUNTANT Professional Summary Senior ...


### Load SentenceTransformer Embedding Model

In [7]:
model = SentenceTransformer("all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Generate Resume & Job Embeddings

In [8]:
resume_embeddings = model.encode(
    resumes_df['embedding_text'].tolist(),
    convert_to_tensor=True,
    show_progress_bar=True
)


Batches:   0%|          | 0/78 [00:00<?, ?it/s]

In [9]:
job_embeddings = model.encode(
    jobs_df['embedding_text'].tolist(),
    convert_to_tensor=True,
    show_progress_bar=True
)


Batches:   0%|          | 0/688 [00:00<?, ?it/s]

### Compute Similarity Matrix (Resumes → Jobs)

In [10]:
similarity_matrix = util.cos_sim(resume_embeddings, job_embeddings)
similarity_matrix.shape

torch.Size([2484, 22000])

### Generate Top-N Job Recommendations

In [11]:
TOP_N = 5

recommendations = []

for idx, row in resumes_df.iterrows():
    sim_scores = similarity_matrix[idx]
    top_indices = torch.topk(sim_scores, k=TOP_N).indices.tolist()

    for rank, job_idx in enumerate(top_indices):
        recommendations.append({
            "resume": row["filename"],
            "candidate_name": row["name"],
            "rank": rank + 1,
            "job_index": job_idx,
            "job_title": jobs_df.iloc[job_idx].get("positionName", ""),
            "job_description": jobs_df.iloc[job_idx][job_text_col],
            "similarity_score": float(sim_scores[job_idx])
        })

recommend_df = pd.DataFrame(recommendations)
recommend_df.head(10)
recommend_df.to_csv(f"{OUTPUT_DIR}/resume_job_recommendations.csv", index=False)


### Save Recommendations to Output Folder

In [12]:
output_path = f"{OUTPUT_DIR}/job_recommendations.csv"
recommend_df.to_csv(output_path, index=False)

print("Saved job recommendations to:", output_path)


Saved job recommendations to: output/job_recommendations.csv


### Preview Recommendations for a Single Resume

In [13]:
sample_resume = resumes_df.iloc[0]["filename"]

recommend_df[recommend_df['resume'] == sample_resume]\
    .sort_values(by="similarity_score", ascending=False)\
    .head(10)



Unnamed: 0,resume,candidate_name,rank,job_index,job_title,job_description,similarity_score
0,10554236.pdf,Wing,1,14430,,Job Summary: This is a non-profit organization...,0.628113
1,10554236.pdf,Wing,2,16327,,RESPONSIBILITIES:A Kforce client is seeking an...,0.613144
2,10554236.pdf,Wing,3,7453,,General Purpose:Oversees the portfolio-level r...,0.608408
3,10554236.pdf,Wing,4,16253,,"Staff Accountant - Technology, Up to $65kStaff...",0.606709
4,10554236.pdf,Wing,5,14213,,RESPONSIBILITIES:Kforce has a client in Biller...,0.605732
