 This is an implementation of gradient descent to solve a hypothetical linear regression problem.

In [39]:
import numpy as np  # Importing numpy

In [40]:
train_data = np.genfromtxt('train_data.csv', delimiter=',')   # Extracting data from csv file
train_data = np.delete(train_data, (0), axis=0)   # Deleting nan values 

In [41]:
theta = np.array([0,0,0,0,0])   # Initialising parameters
y =train_data[:,-1]             # Storing y values
X = train_data[:, 0:4]          # Creating feature/design matrix
m= len(train_data)              # Number of training examples
X= np.insert(X,0,np.ones(m),axis = 1)   # Setting x0 = 1, for all rows
alpha = 0.03              # Initialising learning rate

In [42]:
# Given below is a function used to calculate the cost/loss 

def calc_cost(hypothesis,y):
        diff = hypothesis - y
        s = np.sum(diff**2)    # Sum of square error
        c = s/(2*m)
        return c

In [43]:
hypothesis = np.dot(X,theta.transpose())   #Initial hypothesis
oldcost = calc_cost(hypothesis,y)        #Initial cost value
diff = hypothesis - y                   #Initial difference between hypothesis and target

In [44]:
#Given below is the iteration step that implements gradient descent. The derivative has been manually calculated.
ct = 1                        
while True:
        theta = theta - (alpha/m)*(np.dot(diff.transpose(),X))  #Theta simulataneous update.
        hypothesis = np.dot(X,theta.transpose())                #Hypothesis update
        diff = hypothesis - y
        newcost = calc_cost(hypothesis,y)                     #Cost value updated
        print "Iteration:-",ct," Cost = ",newcost
        ct = ct + 1
        if abs(newcost-oldcost) <= 0.000000001:                   #Checking for convergence 
                break
        else:
                oldcost = newcost

Iteration:- 1  Cost =  9.23117073133
Iteration:- 2  Cost =  8.15011072165
Iteration:- 3  Cost =  7.19822674457
Iteration:- 4  Cost =  6.36006990235
Iteration:- 5  Cost =  5.6220389907
Iteration:- 6  Cost =  4.97215951358
Iteration:- 7  Cost =  4.39988912804
Iteration:- 8  Cost =  3.89594635795
Iteration:- 9  Cost =  3.45215979359
Iteration:- 10  Cost =  3.06133532717
Iteration:- 11  Cost =  2.71713926694
Iteration:- 12  Cost =  2.41399543102
Iteration:- 13  Cost =  2.14699454878
Iteration:- 14  Cost =  1.91181449791
Iteration:- 15  Cost =  1.70465008118
Iteration:- 16  Cost =  1.52215120192
Iteration:- 17  Cost =  1.36136843379
Iteration:- 18  Cost =  1.21970510033
Iteration:- 19  Cost =  1.09487508598
Iteration:- 20  Cost =  0.984865692773
Iteration:- 21  Cost =  0.8879049395
Iteration:- 22  Cost =  0.802432771846
Iteration:- 23  Cost =  0.727075715835
Iteration:- 24  Cost =  0.660624562741
Iteration:- 25  Cost =  0.60201472288
Iteration:- 26  Cost =  0.550308929115
Iteration:- 27  Co

In [45]:
print "Weights arrived at:- ",theta                            #Display of parameters obtained 

Weights arrived at:-  [ 1.6712542   0.71686719  3.12604733  0.4236616   1.21379148]


In [46]:
predicted = np.dot(X,theta.transpose())                        #Values predicted by obtained theta
print predicted

[ 6.19915552  2.87989987  4.91548445  2.71188232  4.79889363  2.91008921
  5.38027605  3.00700752  6.31704439  3.52082133  4.22935459  2.98600444
  3.1943955   2.20708758  5.43229502  3.26377494  3.95685655  4.26210833
  3.11143852  5.81319686  3.32896393  5.49146177  2.95134046  3.12537947
  4.03145606  3.15895408  2.88045164  4.41634121  3.29171561  5.44429556
  3.24289176  3.66519046  5.0492612   4.8012544   4.48061298  5.56483498
  5.40492197  2.91373898  4.16129582  5.03359468  2.76312346  5.20967066
  5.41725286  3.45880138  4.65672777  5.08920562  5.86017043  3.68965448
  3.5701568   4.50310184  4.79426242  5.36879806  4.40350931  3.82656719
  3.20929541  3.00805337  5.35952176  4.58233188  5.81131969  3.15576473
  2.33875817  3.54652885  4.78325396  6.06607694  3.61776716  3.99102622
  4.32753     4.9525326   5.286994    4.8993675   4.91757068  5.63621455
  5.69188204  5.30191553  2.91710774  5.50976083  2.92214607  6.3796793
  3.90848864  3.52960279  3.67509028  6.06490594  1.

In [47]:
print "The errors for each prediction are given in the following vector:-"
print predicted - y

The errors for each prediction are given in the following vector:-
[ -9.44486963e-04   2.18883537e-03   5.88902478e-04   2.35101644e-03
   1.25770346e-04   1.81549295e-03  -9.32965594e-04   9.48777053e-04
  -1.42530950e-03   8.39003844e-04   7.92106917e-04   1.14788836e-03
   7.17448130e-04   1.98107951e-03  -2.39729438e-04   1.08865610e-03
   9.33450452e-05   6.63639136e-04   5.56521349e-04  -9.71992040e-04
   9.14872964e-04  -8.73517264e-04   1.17298462e-03   5.40408530e-04
   6.56924439e-04   1.23283226e-03   1.81868662e-03   2.86251270e-04
   9.25665813e-04  -1.03801899e-03   6.62448274e-04   1.83807011e-04
   4.67126683e-04   9.63590206e-04  -6.47195611e-04  -6.25622764e-04
   9.70473333e-05   8.02386081e-04   1.01916265e-03  -9.57981623e-04
   1.55882463e-03  -1.37393904e-03   3.19806531e-04   3.59770597e-04
   1.85335216e-05  -7.10657173e-04  -1.03037505e-03  -5.12061176e-05
   1.28674763e-04   1.12308894e-03  -1.10057945e-04  -4.41907969e-04
  -3.12122884e-04   6.34319662e-04  

In [48]:
test = np.genfromtxt('test_input.csv',delimiter = ',')
test= np.delete(test,(0),axis=0)
test= np.insert(test,0,np.ones(len(test)),axis = 1)
#print np.dot(test,theta.transpose())
np.savetxt("testop.csv", np.dot(test,theta.transpose()), delimiter=",")