In [1]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

file_path = r'C:/Users/hp5cd/Downloads/UpdatedResumeDataSet.csv'

df = pd.read_csv(file_path)

print("✅ Dataset loaded successfully.")
print(df.head())
print("Columns:", df.columns)

nlp = spacy.load('en_core_web_sm')

def preprocess(text):
    if pd.isnull(text):
        return ""
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens)

print("⚙️ Preprocessing resumes...")
df['Cleaned_Resume'] = df['Resume'].apply(preprocess)

job_desc = """
We are looking for a data scientist with experience in machine learning, Python programming,
NLP techniques, data preprocessing, model building, and deployment.
"""

job_desc_cleaned = preprocess(job_desc)

corpus = df['Cleaned_Resume'].tolist() + [job_desc_cleaned]

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)

resume_vectors = X[:-1]
job_vector = X[-1:]

similarities = cosine_similarity(resume_vectors, job_vector).flatten()

df['Score'] = similarities

df_sorted = df.sort_values(by='Score', ascending=False)

print("\n🏆 Top Ranked Resumes:")
print(df_sorted[['Score', 'Category']].head())

output_file = 'ranked_resumes.csv'
df_sorted[['Score', 'Category', 'Resume']].to_csv(output_file, index=False)
print(f"\n✅ Ranked results saved to {output_file}")

✅ Dataset loaded successfully.
       Category                                             Resume
0  Data Science  Skills * Programming Languages: Python (pandas...
1  Data Science  Education Details \r\nMay 2013 to May 2017 B.E...
2  Data Science  Areas of Interest Deep Learning, Control Syste...
3  Data Science  Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...
4  Data Science  Education Details \r\n MCA   YMCAUST,  Faridab...
Columns: Index(['Category', 'Resume'], dtype='object')
⚙️ Preprocessing resumes...

🏆 Top Ranked Resumes:
       Score      Category
17  0.293837  Data Science
37  0.293837  Data Science
7   0.293837  Data Science
27  0.293837  Data Science
38  0.276745  Data Science

✅ Ranked results saved to ranked_resumes.csv
