In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

## price = m1 * area + m2 * bedrooms + m3 * age + b

- *price* : dependent variable (feature)
- *area, bedrooms, age* : independent variables
- *m1, m2, m3* : coefficients
- *b* : intercept

In [2]:
df = pd.read_csv('homeprices.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [6]:
import math
median_bedrooms = math.floor(df.bedrooms.median())
median_bedrooms

4

In [8]:
# fill missing values with median value
df.bedrooms = df.bedrooms.fillna(median_bedrooms)
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [12]:
reg = linear_model.LinearRegression()
reg.fit(df[['area', 'bedrooms', 'age']], df.price)

m1, m2, m3 = reg.coef_
b = reg.intercept_

print(f'Coefficients: {m1}, {m2}, {m3}')
print(f'Intercept: {b}')

Coefficients: 112.06244194213461, 23388.880077939204, -3231.7179086329647
Intercept: 221323.00186540408


In [11]:
reg.predict([[3000, 3, 40]])



array([498408.25158031])

In [13]:
reg.predict([[2500, 4, 5]])



array([578876.03748933])