In [3]:
import pandas as pd
from word2number import w2n

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("hiring.csv", names=['experience', 'test_score', 'interview_score', 'salary'], skiprows=[0])

df.head()

Unnamed: 0,experience,test_score,interview_score,salary
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000


## Data Preprocessing

In [7]:
df.experience.fillna('zero', inplace=True)

df.head()

Unnamed: 0,experience,test_score,interview_score,salary
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000


In [3]:
# install word2number

# !pip install word2number

Defaulting to user installation because normal site-packages is not writeable
Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py): started
  Building wheel for word2number (setup.py): finished with status 'done'
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5589 sha256=ce24ddb0c6d5bdfd5eda5fbbdb6025e709a5aa7eed5863dbb0a0a644647d1b86
  Stored in directory: c:\users\91889\appdata\local\pip\cache\wheels\cd\ef\ae\073b491b14d25e2efafcffca9e16b2ee6d114ec5c643ba4f06
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1



[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
# word2number library is used to convert str number to int number

df.experience = df.experience.apply(w2n.word_to_num)

In [9]:
df.head()

Unnamed: 0,experience,test_score,interview_score,salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000


In [10]:
# lets check null values
df.isnull().any()

experience         False
test_score          True
interview_score    False
salary             False
dtype: bool

Now, only test_score has null values

In [11]:
df.test_score.mean()

7.857142857142857

In [12]:
df.test_score.median()

8.0

In [14]:
import math

test_mean = math.floor(df.test_score.mean())

test_mean

7

Let's use mean cuz it is close to 7

In [15]:
# lets fill null values in test score
import math

df.test_score = df.test_score.fillna(test_mean)

In [16]:
df

Unnamed: 0,experience,test_score,interview_score,salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.0,7,72000
7,11,7.0,8,80000


In [17]:
df.isnull().any()

experience         False
test_score         False
interview_score    False
salary             False
dtype: bool

## Train the model

In [18]:
features = df.drop('salary', axis="columns")

features.head()

Unnamed: 0,experience,test_score,interview_score
0,0,8.0,9
1,0,8.0,6
2,5,6.0,7
3,2,10.0,10
4,7,9.0,6


In [19]:
target = df.salary

target.head()

0    50000
1    45000
2    60000
3    65000
4    70000
Name: salary, dtype: int64

In [20]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression()

In [21]:
reg.fit(features, target)

In [22]:
reg.predict([[2,9,6]])

array([53713.86677124])

In [23]:
reg.predict([[12,10,10]])

array([93747.79628651])

## Save the trained model

### By using pickle

In [24]:
import pickle

with open('model_pickle', 'wb') as pkl:
    pickle.dump(reg, pkl)

### By using Joblib

In [25]:
import joblib

joblib.dump(reg, 'model_joblib')

['model_joblib']

## Testing the model

### Testing using pickle model

In [28]:
with open('model_pickle', 'rb') as pkl:
    pkl_model = pickle.load(pkl)

In [29]:
pkl_model.predict([[2,9,6]])

array([53713.86677124])

In [30]:
pkl_model.predict([[12,10,10]])

array([93747.79628651])

### Testing using joblib model

In [32]:
joblib_model = joblib.load('model_joblib')

In [33]:
joblib_model.predict([[2,9,6]])

array([53713.86677124])

In [34]:
joblib_model.predict([[12,10,10]])

array([93747.79628651])