In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import joblib

In [2]:
df = pd.read_csv("deemanding_job.csv")

In [3]:
df.head()

Unnamed: 0,interest,skills,job_role
0,Computer science and technology,"Python, Numpy, Pandas, Scikit-Learn, TensorFlo...",Machine Learning Engineer
1,Computer science and technology,"HTML, CSS, JavaScript, React, Angular, Node.js",Frontend Developer
2,Computer science and technology,"Java, Spring, Hibernate, REST APIs, Microservices",Backend Developer
3,Computer science and technology,"React, Redux, TypeScript, JavaScript, HTML, CSS",Full Stack Developer
4,Computer science and technology,"SQL, Tableau, Excel, Power BI, Data Visualization",Data Analyst


In [4]:
df["skills"] = df["skills"].str.lower().str.replace(",", "")

### Feature Engineering

In [5]:
# Encode job roles
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df["job_role"])

In [7]:
# Encode interest
interest_encoder = LabelEncoder()
df["interest_encoded"] = interest_encoder.fit_transform(df["interest"])

In [8]:
df.head()

Unnamed: 0,interest,skills,job_role,interest_encoded
0,Computer science and technology,python numpy pandas scikit-learn tensorflow keras,Machine Learning Engineer,2
1,Computer science and technology,html css javascript react angular node.js,Frontend Developer,2
2,Computer science and technology,java spring hibernate rest apis microservices,Backend Developer,2
3,Computer science and technology,react redux typescript javascript html css,Full Stack Developer,2
4,Computer science and technology,sql tableau excel power bi data visualization,Data Analyst,2


In [9]:
# Features
X = df[["skills", "interest_encoded"]]

In [10]:
# Create a column transformer to handle different feature types
preprocessor = ColumnTransformer(
    transformers=[
        ('text', CountVectorizer(stop_words='english'), 'skills'),
        ('cat', OneHotEncoder(), ['interest_encoded'])
    ]
)

In [11]:
# Create a pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [12]:
# Train the model
pipeline.fit(X, y)

In [13]:
# Save the model and label encoders
joblib.dump({
    'pipeline': pipeline,
    'label_encoder': label_encoder,
    'interest_encoder': interest_encoder,
    'X': X,
    'y': y
}, 'deemanding_job_model.joblib')

print("Model saved successfully.")

Model saved successfully.
