# **Resume Screening App**

        Zain Nofal 2021723
        Hamza Asif 2021196

In [1]:
import os
import pdfplumber
import re


In [3]:
dataset_path = "/Users/zainnofal/Desktop/Resume Screening/data/data"


### Loading and Preprocessing the Data

In [2]:
import os
import pdfplumber
import re

def extract_text_from_pdfs(folder_path):
    resumes = {}
    for folder in os.listdir(folder_path):
        folder_dir = os.path.join(folder_path, folder)
        
        # Skip if it's a system file or non-directory
        if not os.path.isdir(folder_dir):
            continue
        
        resumes[folder] = []
        for file in os.listdir(folder_dir):
            if file.endswith('.pdf'):
                file_path = os.path.join(folder_dir, file)
                with pdfplumber.open(file_path) as pdf:
                    text = " ".join(page.extract_text() for page in pdf.pages)
                    cleaned_text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
                    resumes[folder].append(cleaned_text)
    return resumes

# Example usage:
dataset_path = "/Users/zainnofal/Desktop/Resume Screening/data/data"
resume_data = extract_text_from_pdfs(dataset_path)


### Feature Extraction

In [18]:
import spacy
import re

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Custom list of skills (you can extend this list as needed)
skills_list = ["accounting", "finance", "auditing", "management", "data analysis", "excel", "python", "teamwork", "communication"]

# List of common education terms
education_keywords = ["bachelor", "master", "phd", "degree", "university", "college", "diploma", "MBA", "MS", "BA"]

def extract_features(resume_text):
    doc = nlp(resume_text)
    features = {
        "education": [],
        "skills": [],
        "experience": [],
    }

    # Extract education-related entities based on specific keywords
    for ent in doc.ents:
        if any(keyword.lower() in ent.text.lower() for keyword in education_keywords):
            features["education"].append(ent.text)

    # Extract skills based on the predefined list
    for token in doc:
        if token.text.lower() in skills_list:
            features["skills"].append(token.text)

    # Extract experience-related information using regex for common phrases
    experience_keywords = ["worked", "experience", "responsible for", "managed", "led", "performed"]
    for sent in doc.sents:
        for keyword in experience_keywords:
            if keyword in sent.text.lower():
                features["experience"].append(sent.text)

    # Remove duplicates in skills and experience
    features["skills"] = list(set(features["skills"]))
    features["experience"] = list(set(features["experience"]))
    
    return features

# Example usage:
features = extract_features(resume_data["APPAREL"][4])
print(features)


{'education': ['Mount Holyoke College', 'Portugal Technical Skills Mastery of'], 'skills': ['communication', 'excel', 'Excel', 'management'], 'experience': ['Strong working knowledge of DataTrak ABC and ClubOs software Ability to work with several operating systems including Microsoft and Mac OSX Fluent in spoken Portuguese; proficient in written Portuguese Knowledge of spoken and written Spanish', 'Listened attentively to account feedback and worked with product development team to introduce competitive product offerings.', 'Company Name City , State Trade Channel Manager 10/2002 to 01/2006 Built and managed 30 new accounts and $15 million in sales.', 'Qualifications Self-motivated Team training and development Strategic and creative thinker Process improvement Outgoing and cheerful attitude Interpersonal, oral, and written communication skills Experience Company Name City , State Membership Sales Representative 01/2015 to Current Achieve monthly individual and team sales and attritio

### Similarity Matching

In [28]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

def compute_similarity(resume_text, job_description):
    resume_embedding = model.encode(resume_text, convert_to_tensor=True)
    job_embedding = model.encode(job_description, convert_to_tensor=True)
    return util.pytorch_cos_sim(resume_embedding, job_embedding).item()

# Example usage:
job_description = "Need a Chef Prepare various a la carte menu items and seasonal specialties and diverse background in food service industry"
similarity = compute_similarity(resume_data["CHEF"][3], job_description)
print(f"Similarity: {similarity}")


Similarity: 0.7282781600952148


### Sentiment Analysis

In [29]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    sentiment = analyzer.polarity_scores(text)
    return sentiment["compound"]

In [30]:
# Example usage:
summary_text = "I am an amazing chef that creates amazing dishes."
sentiment_score = analyze_sentiment(summary_text)
print(f"Sentiment Score: {sentiment_score}")


Sentiment Score: 0.8658


### Scoring and Ranking

In [31]:
def calculate_score(similarity, features, sentiment_score):
    skill_score = len(features["skills"])
    experience_score = len(features["experience"])
    return 0.4 * similarity + 0.3 * skill_score + 0.2 * experience_score + 0.1 * sentiment_score

In [32]:
# Example usage:
score = calculate_score(similarity, features, sentiment_score)
print(f"Overall Score: {score}")


Overall Score: 3.9778912640380866
