In [223]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [268]:
data = {
        'year': [2018, 2015, 2020, 2012],
        'km': [50000, 80000, 20000, 120000],
        'engine_size': [1.6, 2.0, 1.8, 1.4],
        'brand': ['Toyota', 'Honda', 'Volkswagen', 'Ford'],
        'price': [120000, 90000, 150000, 60000]
}

In [269]:
def normalize(data):
    data_numpy = np.asarray(data)
    std_dist = (data - data_numpy.mean()) / data_numpy.std()
    return np.asarray(std_dist, dtype=np.float32)

In [270]:
le = LabelEncoder()

In [271]:
data['brand'] = le.fit_transform(data['brand']).astype(np.float32)
data['brand']

array([2., 1., 3., 0.], dtype=float32)

In [272]:
data['year'] = normalize(data['year'])
data['year']

array([ 0.57735026, -0.41239306,  1.2371792 , -1.4021363 ], dtype=float32)

In [273]:
data['km'] = normalize(data['km'])
data['km']

array([-0.47301617,  0.3378687 , -1.283901  ,  1.4190485 ], dtype=float32)

In [274]:
data['engine_size'] = normalize(data['engine_size'])
data['engine_size']

array([-0.4472136,  1.3416408,  0.4472136, -1.3416408], dtype=float32)

In [275]:
data['price'] = normalize(data['price'])
data['price']

array([ 0.4472136, -0.4472136,  1.3416408, -1.3416408], dtype=float32)

In [276]:
df = pd.DataFrame(data)
df

Unnamed: 0,year,km,engine_size,brand,price
0,0.57735,-0.473016,-0.447214,2.0,0.447214
1,-0.412393,0.337869,1.341641,1.0,-0.447214
2,1.237179,-1.283901,0.447214,3.0,1.341641
3,-1.402136,1.419049,-1.341641,0.0,-1.341641


In [277]:
X = df.drop(['price'], axis=1)
X

Unnamed: 0,year,km,engine_size,brand
0,0.57735,-0.473016,-0.447214,2.0
1,-0.412393,0.337869,1.341641,1.0
2,1.237179,-1.283901,0.447214,3.0
3,-1.402136,1.419049,-1.341641,0.0


In [278]:
y = df['price']
y

0    0.447214
1   -0.447214
2    1.341641
3   -1.341641
Name: price, dtype: float32

In [279]:
X_train, X_test, y_train, y_test = X.iloc[:3], X.iloc[3:], y.iloc[:3], y.iloc[3:]

In [280]:
X_train = np.asarray(X_train).astype(np.float32)
X_train

array([[ 0.57735026, -0.47301617, -0.4472136 ,  2.        ],
       [-0.41239306,  0.3378687 ,  1.3416408 ,  1.        ],
       [ 1.2371792 , -1.283901  ,  0.4472136 ,  3.        ]],
      dtype=float32)

In [281]:
X_test = np.asarray(X_test).astype(np.float32)
X_test

array([[-1.4021363,  1.4190485, -1.3416408,  0.       ]], dtype=float32)

In [282]:
y_train = np.asarray(y_train).astype(np.float32)
y_train

array([ 0.4472136, -0.4472136,  1.3416408], dtype=float32)

In [283]:
y_test = np.asarray(y_test).astype(np.float32)
y_test

array([-1.3416408], dtype=float32)

In [284]:
W = np.random.randn(X_train.shape[1])
b = 0.

In [285]:
W

array([ 0.05380524, -1.04861178,  0.55109352, -1.23496957])

In [286]:
b

0.0

In [287]:
def cost(X, y, W, b): 
    m = X.shape[0]
    cost = 0.
    for i in range(m):
        prediction = np.dot(X[i], W) + b
        cost += (prediction - y[i]) ** 2
    return cost / (2*m)

In [288]:
cost(X_train, y_train, W, b)

3.100839408098238

In [289]:
cost_of_test_before_training = cost(X_test, y_test, W, b)
cost_of_test_before_training

0.4619546866924804

In [290]:
def gradient(X, y, W, b): 
    m, n = X.shape
    derivative_w = np.zeros((n,))
    derivative_b = 0
    for i in range(m):
        predictions = (np.dot(X[i], W) + b) - y[i]
        for j in range(n):
            derivative_w[j] += predictions * X[i, j]    
        derivative_b += predictions
    return derivative_w / m, derivative_b / m

In [291]:
gradient(X_train, y_train, W, b)

(array([-1.84585998,  1.8174731 , -0.30191143, -5.28652506]),
 -2.1495384941026376)

In [292]:
def gradient_descent(X, y, W, b, cost_function, gradient_function, alpha, num_iters): 
    m, n = X.shape
    for i in range(num_iters):
        derivative_w, derivative_b = gradient_function(X, y, W, b)  
        W = W - alpha * derivative_w               
        b = b - alpha * derivative_b               
        cost =  np.sum(cost_function(X, y, W, b))
        if i % (num_iters / 10) == 0:
            print(f"Iteration {i:4}: Cost {float(cost):8.2f}   ")
    print(f"Iteration {num_iters:4}: Cost {float(cost):8.2f}   ")
    return W, b

In [293]:
alpha = 0.01
iterations = 100

In [294]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost     2.72   
Iteration   10: Cost     0.77   
Iteration   20: Cost     0.25   
Iteration   30: Cost     0.11   
Iteration   40: Cost     0.07   
Iteration   50: Cost     0.05   
Iteration   60: Cost     0.04   
Iteration   70: Cost     0.04   
Iteration   80: Cost     0.03   
Iteration   90: Cost     0.03   
Iteration  100: Cost     0.03   


In [295]:
alpha = 0.015

In [296]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost     0.03   
Iteration   10: Cost     0.02   
Iteration   20: Cost     0.02   
Iteration   30: Cost     0.02   
Iteration   40: Cost     0.01   
Iteration   50: Cost     0.01   
Iteration   60: Cost     0.01   
Iteration   70: Cost     0.01   
Iteration   80: Cost     0.01   
Iteration   90: Cost     0.01   
Iteration  100: Cost     0.01   


In [297]:
alpha = 0.02

In [298]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost     0.01   
Iteration   10: Cost     0.01   
Iteration   20: Cost     0.01   
Iteration   30: Cost     0.01   
Iteration   40: Cost     0.01   
Iteration   50: Cost     0.01   
Iteration   60: Cost     0.00   
Iteration   70: Cost     0.00   
Iteration   80: Cost     0.00   
Iteration   90: Cost     0.00   
Iteration  100: Cost     0.00   


In [299]:
alpha = 0.025

In [300]:
W, b = gradient_descent(X_train, y_train, W, b, cost, gradient, alpha, iterations)

Iteration    0: Cost     0.00   
Iteration   10: Cost     0.00   
Iteration   20: Cost     0.00   
Iteration   30: Cost     0.00   
Iteration   40: Cost     0.00   
Iteration   50: Cost     0.00   
Iteration   60: Cost     0.00   
Iteration   70: Cost     0.00   
Iteration   80: Cost     0.00   
Iteration   90: Cost     0.00   
Iteration  100: Cost     0.00   


In [301]:
W

array([ 0.37729279, -1.30459183,  0.11865622, -0.39161015])

In [302]:
b

0.3724796645377259

In [303]:
cost(X_train, y_train, W, b)

0.001409306609720367

In [304]:
cost_of_test_after_training = cost(X_test, y_test, W, b)
cost_of_test_after_training

0.3406166617187846

In [305]:
print("Cost before training: ", cost_of_test_before_training)
print("Cost after training: ", cost_of_test_after_training)
print("Diff: ", cost_of_test_before_training - cost_of_test_after_training)

Cost before training:  0.4619546866924804
Cost after training:  0.3406166617187846
Diff:  0.12133802497369583


In [306]:
def predict(X, W, b):
    return (np.dot(X, W) + b).astype(np.float32)

In [307]:
y_pred = predict(X_test, W, b)
y_pred

array([-2.1670094], dtype=float32)

In [308]:
y_test

array([-1.3416408], dtype=float32)

In [312]:
def accuracy(y, y_pred):
    diff = np.absolute(y - y_pred)
    return np.absolute(diff / y) * 100

In [313]:
acc = accuracy(y_test, y_pred)
acc

array([61.519337], dtype=float32)

In [314]:
print("Model accuracy is: ", acc[0])

Model accuracy is:  61.519337
