#importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importing data

In [None]:
house_calif_test = pd.read_csv('/content/sample_data/california_housing_test.csv')
house_calif_train = pd.read_csv('/content/sample_data/california_housing_train.csv')

In [None]:
print(house_calif_train)

       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0        -114.31     34.19                15.0       5612.0          1283.0   
1        -114.47     34.40                19.0       7650.0          1901.0   
2        -114.56     33.69                17.0        720.0           174.0   
3        -114.57     33.64                14.0       1501.0           337.0   
4        -114.57     33.57                20.0       1454.0           326.0   
...          ...       ...                 ...          ...             ...   
16995    -124.26     40.58                52.0       2217.0           394.0   
16996    -124.27     40.69                36.0       2349.0           528.0   
16997    -124.30     41.84                17.0       2677.0           531.0   
16998    -124.30     41.80                19.0       2672.0           552.0   
16999    -124.35     40.54                52.0       1820.0           300.0   

       population  households  median_income  media

# separating variables and predicted value

## Adding Bias

In [None]:
x = house_calif_train.iloc[:,:-1].values
z = np.insert(x, 0 , values=1, axis=1)
print(z)

[[ 1.0000e+00 -1.1431e+02  3.4190e+01 ...  1.0150e+03  4.7200e+02
   1.4936e+00]
 [ 1.0000e+00 -1.1447e+02  3.4400e+01 ...  1.1290e+03  4.6300e+02
   1.8200e+00]
 [ 1.0000e+00 -1.1456e+02  3.3690e+01 ...  3.3300e+02  1.1700e+02
   1.6509e+00]
 ...
 [ 1.0000e+00 -1.2430e+02  4.1840e+01 ...  1.2440e+03  4.5600e+02
   3.0313e+00]
 [ 1.0000e+00 -1.2430e+02  4.1800e+01 ...  1.2980e+03  4.7800e+02
   1.9797e+00]
 [ 1.0000e+00 -1.2435e+02  4.0540e+01 ...  8.0600e+02  2.7000e+02
   3.0147e+00]]


In [None]:
y = house_calif_train.iloc[:,-1].values
print(y)

[ 66900.  80100.  85700. ... 103600.  85800.  94600.]


# Calculating cost

## intiating bias and weights

In [None]:
import random #for intialising weight and bias randomly
def initialise(dim): #dim is dimension for weights vector
  b = random.random()
  w = np.random.rand(dim)
  return b,w
b,w = initialise(len(z[0]))
b


0.6500563645325316

## calculating y_pred

In [None]:
def calculating_pred(b,w,x):
  return b + np.dot(x,w)
y_cal = calculating_pred(b,w,z)
y_cal


array([4325.94644915, 5742.41048729,  635.13204627, ..., 2411.47053423,
       2451.67068024, 1556.39131147])

##calculating cost

In [None]:
import math
def get_cost(h,y):
  diff = h - y
  val = np.sum(np.dot(diff.T,diff))/len(h-y)
  return val
h= y_cal
get_cost(h,y)


55364455323.54392

## updating parameter using learning rate

In [None]:
def modify_w(h,y,w,b,alpha):
  del_b = np.sum(h-y)*2/len(y)
  del_w = np.dot((h-y),z)*2/len(y)
  b_new = b - alpha*del_b
  w_new = w - alpha*del_w
  return b_new,w_new
b,w = modify_w(y_cal,y,w,b,0.01)
w



array([ 1.63862016e+04, -1.95995374e+06,  5.80888144e+05,  4.81609286e+05,
        4.56288111e+07,  8.95359487e+06,  2.29627759e+07,  8.37216879e+06,
        7.58484987e+04])

# Minimizing cost over all iterations

In [None]:
def grad_descent(x,y,alpha,num_iter):
  iter_num = 0
  b,w = initialise(len(z[0]))
  for each_iter in range(num_iter):
    h = calculating_pred(b,w,x)
    prev_b = b
    prev_w = w
    b,w=modify_w(h,y,prev_w,prev_b,alpha)
    iter_num+=1
    return b,w

grad_descent(z,y,0.001,200)


(410.4747677259305,
 array([ 4.10330088e+02, -4.90688612e+04,  1.45438521e+04,  1.20548106e+04,
         1.14349219e+06,  2.24380127e+05,  5.75416202e+05,  2.09800146e+05,
         1.89932123e+03]))