# ft_linear_regression
## An introduction to machine learning
You will implement a simple linear regression with a single feature - in this case, the
mileage of the car.

In [2]:
import numpy as np
import argparse
import matplotlib.pyplot as plt


In [3]:
data = np.genfromtxt("data.csv", delimiter=',', skip_header=1)

### Data min-max normalization function:

In [4]:
def normalize(a):
    M = len(a)
    z = np.zeros(a.shape)
    for i in range(M):
        z[i] = (a[i] - np.min(a)) / (np.max(a) - np.min(a))
    return z

In [5]:
X = normalize(data[:,0])
Y = normalize(data[:,1])

### Estimate price function:
* The first program will be used to predict the price of a car for a given mileage.
When you launch the program, it should prompt you for a mileage, and then give
you back the estimated price for that mileage.

The program will use the following hypothesis to predict the price:

> estimatePrice(mileage) = $\theta$0 + ($\theta$1 ∗ mileage)

In [6]:
def estimate_price(mileage, theta0, theta1):
    return theta0 + theta1 * mileage

* The second program will be used to train your model. It will read your dataset
file and perform a linear regression on the data.
Once the linear regression has completed, you will save the variables theta0 and
theta1 for use in the first program.

You will be using the following formulas :
> tmp$\theta$0 = learningRate $\times$ $\frac{1}{m}$$\sum_{i=0}^{m-1}$(estimatePrice(mileage[i]) - price[i])

> tmp$\theta$1 = learningRate $\times$ $\frac{1}{m}$$\sum_{i=0}^{m-1}$(estimatePrice(mileage[i]) - price[i])) $\times$	 mileage[i]

In [7]:
def gradient_descent(X, Y, curr_theta0, curr_theta1, lr):
    M = len(X)
    deriv_theta0 = 0
    deriv_theta1 = 0
    for i in range(M):
        deriv_theta0 += (1 / M) * ((curr_theta0 + (curr_theta1 * X[i])) - Y[i])
        deriv_theta1 += (1 / M) * (((curr_theta0 + (curr_theta1 * X[i])) - Y[i]) * X[i])
    tmp_theta0 = curr_theta0 - lr * deriv_theta0
    tmp_theta1 = curr_theta1 - lr * deriv_theta1
    return tmp_theta0, tmp_theta1

### Cost function:

In [8]:
def cost_function(X, Y, theta0, theta1):
    M = len(X)
    err = 0.0
    for i in range(M):
        err += (Y[i] - estimate_price(X[i], theta0, theta1)) ** 2
    return err / M

### Linear Regression function:

In [9]:
def linear_regression(X, Y, lr, epochs):
    M = len(X)
    theta0 = 0
    theta1 = 0
    loss = []
    for i in range(epochs):
        theta0, theta1 = gradient_descent(X, Y, theta0, theta1, lr)
        loss.append(cost_function(X, Y, theta0, theta1))
    return theta0, theta1, loss

In [15]:
X = data[:, 0]
Y = data[:, 1]
normalized_X = normalize(X)
normalized_Y = normalize(Y)
learning_rate = 0.01
epochs = 1000
theta0, theta1, loss = linear_regression(normalized_X, normalized_Y, learning_rate, epochs)

In [None]:
def predict(data, X, theta0, theta1):
    normd =  data - np.min(X) / (np.max(X) - np.min(X))
    return theta1 + theta0 * normd

In [None]:
print('Final loss = ', loss[-1])
print('Thetas = ', [theta0, theta1])
line_x = [min(X), max(X)]
line_y = [(theta1 * i) + theta0 for i in line_x]
# plt.plot(loss)
plt.plot(line_x, line_y, 'b')
# plt.plot(X, Y, 'ro')
plt.show()
# np.savetxt("thetas.csv", [theta1, theta0], delimiter=",")

In [None]:
    mileage = float(input("Enter a mileage for estimation: "))
    print(predict(mileage, X, theta0, theta1))