## 01. Install the Libraries First

In [3]:
import pandas as pd
import numpy as np
from sklearn import linear_model

## 02. Load Data into pandas Dataframe

In [5]:
df = pd.read_csv('03.homeprices.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


## 03. Data Preprocessing: Calculating the Median

To remove the 'NaN' value, we will first find out the median of the other four values. Then, set the median value to the column where NaN exists. 

In [6]:
df.bedrooms.median()

4.0

In [7]:
import math
median_of_bedrooms = math.floor(df.bedrooms.median())
median_of_bedrooms

4

## 04. Usage of `fillna` method

In [13]:
df.bedrooms.fillna(median_of_bedrooms)

0    3.0
1    4.0
2    4.0
3    3.0
4    5.0
5    6.0
Name: bedrooms, dtype: float64

In [15]:
df.bedrooms = df.bedrooms.fillna(median_of_bedrooms)
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


## 05. Train the Linear Regression Model

In [16]:
reg = linear_model.LinearRegression()
reg.fit(df[['area','bedrooms','age']],df.price)

## 06. Predict

In [18]:
reg.predict([[1200,2,10]])



array([370258.51326551])

In [19]:
reg.coef_

array([  112.06244194, 23388.88007794, -3231.71790863])

In [20]:
reg.intercept_

221323.00186540408

## 07. Testing

In [21]:
112.06244194*1200+23388.88007794*2+10*-3231.71790863+221323.00186540408

370258.5132629841

## 08. Exercise (Prediction of the Salary)

### 08.1. Import the CSV file

In [22]:
exercise_file = pd.read_csv('03.hiring.csv')
exercise_file

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


### 08.2. Data Preprocessing

01. We will convert the 'numbers in words' to integers in the 'experience' column. 
02. We are getting some NaN values in some columns. We will remove them first.

### 08.3. By 'word to number' module 

In [26]:
pip install word2number==1.1

Collecting word2number==1.1
  Downloading word2number-1.1.zip (9.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py): started
  Building wheel for word2number (setup.py): finished with status 'done'
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5590 sha256=add2abfe5dd566fbdbdfbafc8a86eb088ddebae4dd8b15e0f09f3ce9fdfe60ec
  Stored in directory: c:\users\sy929\appdata\local\pip\cache\wheels\29\85\cd\a78aeab138a03ac89a6ed4f8f00665d5ceb7ac0318c837a086
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1
Note: you may need to restart the kernel to use updated packages.


In [49]:
from word2number import w2n

In [60]:
exercise_file.experience = exercise_file.experience.fillna('zero')
exercise_file

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [62]:
exercise_file.experience = exercise_file.experience.apply(w2n.word_to_num)
exercise_file

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


In [67]:
import math
mean_of_exercise_file = math.floor(exercise_file['test_score(out of 10)'].mean())
mean_of_exercise_file

7

In [72]:
exercise_file['test_score(out of 10)'] = exercise_file['test_score(out of 10)'].fillna(mean_of_exercise_file)
exercise_file['test_score(out of 10)'] 

0     8.0
1     8.0
2     6.0
3    10.0
4     9.0
5     7.0
6     7.0
7     7.0
Name: test_score(out of 10), dtype: float64

In [71]:
exercise_file

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.0,7,72000
7,11,7.0,8,80000


### 08.4. Training the Model

In [73]:
reg2 = linear_model.LinearRegression()
reg2.fit(exercise_file[['experience','test_score(out of 10)','interview_score(out of 10)']],exercise_file['salary($)'])

In [74]:
reg2.predict([[2,9,6]])



array([53713.86677124])

In [75]:
reg2.predict([[12,10,10]])



array([93747.79628651])