In [69]:
import numpy as np

In [70]:
def step_gradient(points, learning_rate, m):
    
    row_count = len(points)
    col_count = len(points[0])
    
    m_slope = [0 for i in range(col_count)]
    N = row_count
    
    for i in range(N):
        x = points[i]
        y = points[i][col_count-1]
            
        sum = 0
        for k in range(col_count-1):
            sum += m[k] * x[k] 
        
        for j in range(col_count-1):    
            m_slope[j] += (-2/N) * (y- sum) * x[j]
    
    
    new_m = [0 for i in range(col_count-1)]
    for j in range(col_count-1):
        new_m[j] = m[j] - learning_rate * m_slope[j]
        
    return new_m
    

In [71]:
def cost(points, m):
    N = len(points)
    col_count = len(points[0])
    cost = 0
    for i in range(N):
        x = points[i]
        y = points[i][col_count-1]
            
        sum = 0
        for k in range(col_count-1):
            sum += m[k] * x[k]
        
        cost += (1/N) * ((y-sum)**2)
    
    return cost

In [72]:
def gd(points, learning_rate, num_iterations):
    m = [ 0 for i in range(len(points[0])-1)]
    
    for i in range(num_iterations):
        m = step_gradient(points, learning_rate, m) 
        print(i, "Cost: ", cost(points,m))
    return m

In [73]:
def load_data():
    data = np.genfromtxt("boston_x_y_train.csv", delimiter = ",")
    
    row_count = len(data)
    col_count = len(data[0])
    
    n_data = [[0 for j in range(col_count+1)]for i in range(row_count)]

    for i in range(row_count):
        for j in range(col_count-1):
            n_data[i][j] = data[i][j]
        
        n_data[i][col_count-1] = 1
        n_data[i][col_count] =  data[i][col_count-1]
    
    new_data = np.array(n_data)    
    return new_data



In [74]:
def predict(points,m):
    y = [0 for i in range(len(points))]
    
    for i in range(len(points)):
        x = points[i]
        y[i] = 0
        for j in range(len(points[0])-1):
            y[i] += m[j] * x[j]
        
        y[i] += m[j] 
    
    return y

In [75]:
from sklearn import preprocessing
import pandas as pd

In [76]:
def scale(data):
    x_scaled = preprocessing.scale(data)
    
    n_data = [[0 for j in range(len(data[0]))]for i in range(len(data))]

    for i in range(len(data)):
        for j in range(len(data[0])-2):
            n_data[i][j] = x_scaled[i][j]
        n_data[i][len(data[0])-2] = data[i][len(data[0])-2]
        n_data[i][len(data[0])-1] = data[i][len(data[0])-1]
        
    return n_data

In [77]:
def run():
    data = load_data()
    
#     print(pd.DataFrame(preprocessing.scale(data)).describe())
    
    
    data_scaled = scale(data)
    
#     print(pd.DataFrame(preprocessing.scale(data_scaled)).describe())
    
    
    learning_rate =  0.1
    num_iterations =  100
    
    m = gd(data_scaled, learning_rate, num_iterations)
    
    print("Set of m: ")
    for x in m:
        print(x)
    
    test_data = np.genfromtxt("boston_x_test.csv", delimiter = ",")
    test_data_scaled = preprocessing.scale(test_data)
    
#     print(pd.DataFrame(preprocessing.scale(test_data_scaled)).describe())
    
    y = predict(test_data_scaled,m)

    np.savetxt("result.csv",y, fmt= '%0.5f')

In [78]:
run()

0 Cost:  372.640228249
1 Cost:  246.080684499
2 Cost:  166.314625295
3 Cost:  115.50146384
4 Cost:  83.0587141243
5 Cost:  62.3083509347
6 Cost:  49.0104149274
7 Cost:  40.4681913201
8 Cost:  34.9643575745
9 Cost:  31.40411193
10 Cost:  29.0888396195
11 Cost:  27.5723391448
12 Cost:  26.5693719176
13 Cost:  25.8974284289
14 Cost:  25.4396109945
15 Cost:  25.1209566428
16 Cost:  24.8933227757
17 Cost:  24.7257327939
18 Cost:  24.5982057181
19 Cost:  24.4978103359
20 Cost:  24.4161404895
21 Cost:  24.3476987184
22 Cost:  24.2888607704
23 Cost:  24.2372117322
24 Cost:  24.1911200201
25 Cost:  24.1494636905
26 Cost:  24.1114543471
27 Cost:  24.0765236215
28 Cost:  24.0442498035
29 Cost:  24.0143102581
30 Cost:  23.9864504249
31 Cost:  23.9604634982
32 Cost:  23.9361770008
33 Cost:  23.9134438198
34 Cost:  23.892136141
35 Cost:  23.8721412754
36 Cost:  23.8533587287
37 Cost:  23.8356980951
38 Cost:  23.8190775015
39 Cost:  23.8034224269
40 Cost:  23.7886647798
41 Cost:  23.7747421583
42 Cos