In [1]:
!pip install nltk scikit-learn pandas numpy



Defaulting to user installation because normal site-packages is not writeable


In [2]:
import nltk
import re
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('stopwords')
from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\VARSHINI\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


In [6]:
with open("resumes.txt", "r") as file:
    resume_text = file.read()

with open("job_description.txt", "r") as file:
    job_text = file.read()


In [7]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z ]', '', text)
    words = text.split()
    words = [word for word in words if word not in stopwords.words('english')]
    return " ".join(words)


In [8]:
# Cleaning resume and job description text
resume_clean = clean_text(resume_text)
job_clean = clean_text(job_text)


In [9]:
# Converting resume and job description into TF-IDF vectors
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform([resume_clean, job_clean])


In [10]:
# Calculating cosine similarity between resume and job description
similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

match_percentage = round(similarity_score[0][0] * 100, 2)
print("Resume Match Percentage:", match_percentage, "%")


Resume Match Percentage: 24.62 %


In [11]:
skills_list = [
    "python", "machine learning", "deep learning", "data analysis",
    "sql", "nlp", "data visualization", "pandas", "numpy"
]


In [12]:
# Extracting skills from resume and job description
resume_skills = [skill for skill in skills_list if skill in resume_clean]
job_skills = [skill for skill in skills_list if skill in job_clean]
# Identifying missing skills
missing_skills = list(set(job_skills) - set(resume_skills))


In [13]:
print("Skills in Resume:", resume_skills)
print("Skills Required for Job:", job_skills)
print("Missing Skills:", missing_skills)


Skills in Resume: ['python', 'machine learning', 'data analysis', 'sql', 'pandas']
Skills Required for Job: ['python', 'machine learning', 'deep learning', 'sql', 'nlp', 'data visualization']
Missing Skills: ['deep learning', 'nlp', 'data visualization']


In [14]:
print("\n----- FINAL RESULT -----")
print(f"Resume Match Percentage: {match_percentage}%")

if missing_skills:
    print("Skills to Improve:", ", ".join(missing_skills))
else:
    print("Your resume perfectly matches the job description!")



----- FINAL RESULT -----
Resume Match Percentage: 24.62%
Skills to Improve: deep learning, nlp, data visualization
