In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def load_data():
    data = pd.read_csv('students.csv')
    return data
df = load_data()

np.random.seed(1)
major_spec = ['TMBA', 'SEMBA', 'ME', 'IMMBA', 'PMBA','FMBA','MFE','IMMS','GBP',"EMBA",'DMFBA']
major_generated = np.random.choice(major_spec, 
    p = [0.08,0.09,0.1,0.07,0.06,0.085,0.115,0.05,0.15,0.095,0.105],
    size = len(df))
# Preprocess

def next_data(df):
    global major_generated
    df = df.drop(['ssc_b', 'hsc_b'], axis = 1)

    column_names=['gender', '10_grade', '12_grade', 'spec_higher_edu',
    'degree_percent','undergrad_major', 'work_exp' ,'employ_test', 
    'post_grad_spec', 'post_grad_percent','status', 'salary' ]
    df.columns = column_names

    df['salary'] = df['salary'].round()
    df['10_grade'] = df['10_grade'].round()
    df['12_grade'] = df['12_grade'].round()
    df['post_grad_percent'] = df['post_grad_percent'].round()
    df['degree_percent'] = df['degree_percent'].round()
    df.drop('post_grad_spec', axis = 1, inplace = True)
    df.loc[df['status']=='Not Placed', 'salary']= 0
    df['department'] = major_generated
    df['gender']=df['gender'].map({'M':'Male', 'F':'Female'})
    return df
dataset = next_data(df=df)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

encoder = OneHotEncoder()
scaler = StandardScaler()
model = RandomForestClassifier()

### First Model -- Classifier

In [4]:
cols_to_drop = ['spec_higher_edu', '10_grade', '12_grade', 'employ_test', 'post_grad_percent', 'salary']
train = dataset.drop(cols_to_drop, axis = 1)
X = train.drop('status', axis = 1)
y = train['status'].map({'Placed':1,"Not Placed":0})

In [6]:
X.head(3)

Unnamed: 0,gender,degree_percent,undergrad_major,work_exp,department
0,Male,58.0,Sci&Tech,No,FMBA
1,Male,77.0,Sci&Tech,Yes,GBP
2,Male,64.0,Comm&Mgmt,No,TMBA


In [7]:
pd.get_dummies(X)

Unnamed: 0,degree_percent,gender_Female,gender_Male,undergrad_major_Comm&Mgmt,undergrad_major_Others,undergrad_major_Sci&Tech,work_exp_No,work_exp_Yes,department_DMFBA,department_EMBA,department_FMBA,department_GBP,department_IMMBA,department_IMMS,department_ME,department_MFE,department_PMBA,department_SEMBA,department_TMBA
0,58.0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0
1,77.0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0
2,64.0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
3,52.0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0
4,73.0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,78.0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
211,72.0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0
212,73.0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
213,58.0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0


In [87]:
import pickle

In [88]:
pickle.dump(dt, open('model_classifier.pkl', 'wb'))

In [89]:
dataset

Unnamed: 0,gender,10_grade,12_grade,spec_higher_edu,degree_percent,undergrad_major,work_exp,employ_test,post_grad_percent,status,salary,department
0,Male,67.0,91.0,Commerce,58.0,Sci&Tech,No,55.0,59.0,Placed,270000.0,FMBA
1,Male,79.0,78.0,Science,77.0,Sci&Tech,Yes,86.5,66.0,Placed,200000.0,GBP
2,Male,65.0,68.0,Arts,64.0,Comm&Mgmt,No,75.0,58.0,Placed,250000.0,TMBA
3,Male,56.0,52.0,Science,52.0,Sci&Tech,No,66.0,59.0,Not Placed,0.0,IMMBA
4,Male,86.0,74.0,Commerce,73.0,Comm&Mgmt,No,96.8,56.0,Placed,425000.0,SEMBA
...,...,...,...,...,...,...,...,...,...,...,...,...
210,Male,81.0,82.0,Commerce,78.0,Comm&Mgmt,No,91.0,74.0,Placed,400000.0,PMBA
211,Male,58.0,60.0,Science,72.0,Sci&Tech,No,74.0,54.0,Placed,275000.0,DMFBA
212,Male,67.0,67.0,Commerce,73.0,Comm&Mgmt,Yes,59.0,70.0,Placed,295000.0,ME
213,Female,74.0,66.0,Commerce,58.0,Comm&Mgmt,No,70.0,60.0,Placed,204000.0,SEMBA


In [90]:
from sklearn.ensemble import RandomForestRegressor

In [91]:
regress = dataset[dataset['status']=='Placed']
X_new = regress.drop(['spec_higher_edu', '10_grade', '12_grade', 'employ_test', 'salary', 'status'], axis =1)
y_new = regress['salary']

In [92]:
X_new

Unnamed: 0,gender,degree_percent,undergrad_major,work_exp,post_grad_percent,department
0,Male,58.0,Sci&Tech,No,59.0,FMBA
1,Male,77.0,Sci&Tech,Yes,66.0,GBP
2,Male,64.0,Comm&Mgmt,No,58.0,TMBA
4,Male,73.0,Comm&Mgmt,No,56.0,SEMBA
7,Male,66.0,Sci&Tech,Yes,62.0,PMBA
...,...,...,...,...,...,...
209,Male,65.0,Comm&Mgmt,No,56.0,DMFBA
210,Male,78.0,Comm&Mgmt,No,74.0,PMBA
211,Male,72.0,Sci&Tech,No,54.0,DMFBA
212,Male,73.0,Comm&Mgmt,Yes,70.0,ME


In [93]:
encoder = OrdinalEncoder()
scaler = StandardScaler()
transformerlar = make_column_transformer((encoder, ['gender', 'undergrad_major', 'work_exp', 'department']),
                                      remainder = scaler)

In [94]:
model = RandomForestRegressor()

In [95]:
pipe_model = Pipeline(
    [("preprocessor", transformerlar),
        ("regressor",model)])

In [96]:
pipe_model.fit(X_new, y_new)

In [97]:
pickle.dump(pipe_model, open('model_regressor.pkl', 'wb'))

In [98]:
!pip list

Package                       Version
----------------------------- --------------------
alabaster                     0.7.12
alembic                       1.8.1
altair                        4.2.0
anaconda-clean                1.0
anaconda-client               1.11.0
anaconda-navigator            2.3.1
anaconda-project              0.11.1
anyio                         3.5.0
appdirs                       1.4.4
applaunchservices             0.3.0
appnope                       0.1.2
appscript                     1.1.2
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
arrow                         1.2.3
astroid                       2.11.7
astropy                       5.1
atomicwrites                  1.4.0
attrs                         22.1.0
Automat                       20.2.0
autopep8                      1.6.0
Babel                         2.11.0
backcall                      0.2.0
backports.functools-lru-cache 1.6.4
backports.tempfile            1.0
backport

parsel                        1.6.0
parso                         0.8.3
partd                         1.2.0
pathlib                       1.0.1
pathspec                      0.10.3
patsy                         0.5.3
pep8                          1.7.1
pexpect                       4.8.0
phik                          0.12.2
pickleshare                   0.7.5
Pillow                        9.3.0
pip                           22.3.1
pipreqs                       0.4.11
pkginfo                       1.8.3
platformdirs                  2.5.2
plotly                        5.9.0
pluggy                        1.0.0
ply                           3.11
poyo                          0.5.0
prometheus-client             0.14.1
prompt-toolkit                3.0.36
Protego                       0.1.16
protobuf                      3.20.1
psutil                        5.9.0
ptyprocess                    0.7.0
py                            1.11.0
pyarrow                       