# Job Classifiction

In [None]:
import pandas as pd
import numpy as np
import re
import string

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.utils.multiclass import unique_labels
from sklearn.svm import LinearSVC
import nltk
from nltk.corpus import stopwords
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB

In [None]:
jobs = pd.read_csv("./Job_Posts.csv") 


nltk.download('stopwords')
STOPWORDS = set(stopwords.words('english'))

def clean_text(text):
    if pd.isna(text):
        return ""
    text = re.sub(r'<.*?>', ' ', text) 
    text = re.sub(r'[^a-zA-Z]', ' ', text) 
    text = text.lower()
    text = " ".join([word for word in text.split() if word not in STOPWORDS])
    return text.strip()

jobs['job_description_clean'] = jobs['job_description'].apply(clean_text)
jobs = jobs.dropna(subset=['job_category1', 'job_description_clean'])


jobs.head()

In [None]:
label_encoder = LabelEncoder()
jobs['job_category_encoded'] = label_encoder.fit_transform(jobs['job_category1'])

X_train, X_test, y_train, y_test = train_test_split(jobs['job_description_clean'], jobs['job_category_encoded'], test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
model = LinearSVC()
model.fit(X_train_vec, y_train)

In [None]:
y_pred = model.predict(X_test_vec)

labels_used = unique_labels(y_test, y_pred)
print(classification_report(y_test, y_pred, labels=labels_used, target_names=label_encoder.inverse_transform(labels_used)))

In [None]:
def predict_category(text):
    text_clean = clean_text(text)
    vec = vectorizer.transform([text_clean])
    pred = model.predict(vec)
    return label_encoder.inverse_transform(pred)[0]

# Example
print(predict_category("Looking for someone with strong python skills."))

# Job Recommendations

In [None]:
df = pd.read_csv("./Resume.csv")

df.dropna(inplace=True)

def clean_text(text):
    text = text.lower()
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\s+', ' ', text) 
    return text.strip()

df['Cleaned_Resume'] = df['Resume_str'].apply(clean_text)

print(df[['Category', 'Cleaned_Resume']].head())

#df.head()

In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Cleaned_Resume'])

y = df['Category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = MultinomialNB()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

                        precision    recall  f1-score   support

            ACCOUNTANT       0.77      0.79      0.78        29
              ADVOCATE       0.69      0.30      0.42        30
           AGRICULTURE       0.00      0.00      0.00         8
               APPAREL       0.75      0.15      0.25        20
                  ARTS       0.00      0.00      0.00        18
            AUTOMOBILE       0.00      0.00      0.00         6
              AVIATION       0.56      0.71      0.62        21
               BANKING       0.85      0.48      0.61        23
                   BPO       0.00      0.00      0.00         2
  BUSINESS-DEVELOPMENT       0.40      0.63      0.49        27
                  CHEF       0.81      0.71      0.76        24
          CONSTRUCTION       0.94      0.44      0.60        34
            CONSULTANT       0.50      0.05      0.09        20
              DESIGNER       0.82      0.74      0.78        19
         DIGITAL-MEDIA       1.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# True Job recommendations

In [46]:
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Combine job title + description for matching
jobs['combined_features'] = jobs['job_title'].fillna('') + ' ' + jobs['job_description'].fillna('')

# Clean combined text
jobs['combined_features_clean'] = jobs['combined_features'].apply(clean_text)

# Vectorize job listings
job_vectorizer = TfidfVectorizer(max_features=5000)
job_vectors = job_vectorizer.fit_transform(jobs['combined_features_clean'])

# Step 2: Function to recommend jobs
def recommend_jobs(user_profile_text, top_n=5):
    # Clean and vectorize user profile
    user_clean = clean_text(user_profile_text)
    user_vec = job_vectorizer.transform([user_clean])

    # Compute cosine similarity
    similarity_scores = cosine_similarity(user_vec, job_vectors).flatten()

    # Get top N recommendations
    top_indices = similarity_scores.argsort()[::-1][:top_n]
    recommended_jobs = jobs.iloc[top_indices][['job_title', 'job_description', 'job_category1']]
    recommended_jobs['similarity'] = similarity_scores[top_indices]
    
    return recommended_jobs

# 🧪 Example Usage
user_input = """
Skilled in Python, SQL, and data analysis. Experience in building machine learning models. 
Looking for roles in software development or data science.
"""

recommendations = recommend_jobs(user_input, top_n=5)
import pandas as pd
from caas_jupyter_tools import display_dataframe_to_user
display_dataframe_to_user(name="Top Job Recommendations", dataframe=recommendations)


ModuleNotFoundError: No module named 'caas_jupyter_tools'