In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np

In [2]:
data=fetch_california_housing(as_frame=True)
df=data.frame

In [3]:
df.head(2)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585


In [4]:
df.isnull().sum()

MedInc         0
HouseAge       0
AveRooms       0
AveBedrms      0
Population     0
AveOccup       0
Latitude       0
Longitude      0
MedHouseVal    0
dtype: int64

In [5]:
df.shape

(20640, 9)

In [7]:
x=df.drop(['MedHouseVal'],axis=1)
y=df['MedHouseVal']

In [8]:
def cost(x,y,w,b):
    total_cost=0
    m=x.shape[0]
    for i in range(m):
        f_wb=np.dot(x.iloc[i],w)+b
        total_cost+=(f_wb-y[i])**2
    total_cost=total_cost/(2*m)
    return total_cost

In [9]:
w=np.zeros(x.shape[1])
b=0
cost(x,y,w,b)

2.8052415994936792

In [11]:
def gradient_function(x,y,w,b):
    m,n=x.shape
    dj_dw=np.zeros((n,))
    dj_db=0
    for i in range(m):
        error=(np.dot(x.iloc[i],w)+b)-y[i]
        for j in range(n):
            dj_dw[j]=dj_dw[j]+error*x.iloc[i,j]
        dj_db=dj_db+error
    dj_dw=dj_dw/m
    dj_db=dj_db/m
    return dj_dw,dj_db        

In [12]:
gradient_function(x,y,w,b)

(array([-9.51510987e+00, -6.07763573e+01, -1.16640064e+01, -2.24299836e+00,
        -2.91647064e+03, -6.06734862e+00, -7.33512697e+01,  2.47443158e+02]),
 -2.0685581690891843)

In [13]:
def gradient_descent(x,y,w,b,cost,gradient_function,alpha,iter):
    j_history=[]
    w_history=[]
    m,n=x.shape
    for i in range(iter):
        dj_dw,dj_db=gradient_function(x,y,w,b)
        w=w-(alpha/m)*dj_dw
        b=b-(alpha/m)*dj_db

        cost_values=cost(x,y,w,b)
        j_history.append(cost_values)
        w_history.append(w)
        if i%10==0:
            print(f'the cost at iteration {i} is {j_history[-1]:.2f}')
    return w,b,j_history,w_history        


In [None]:
alpha=0.001
iter=100
w,b,j_history,_=gradient_descent(x,y,w,b,cost,gradient_function,alpha,iter)
print(f'the final values of w and b are {w} and {b}')

the cost at iteration 0 is 2.42
the cost at iteration 10 is 1.54
the cost at iteration 20 is 1.51
the cost at iteration 30 is 1.50
the cost at iteration 40 is 1.50
the cost at iteration 50 is 1.49
the cost at iteration 60 is 1.48
the cost at iteration 70 is 1.48
the cost at iteration 80 is 1.47
the cost at iteration 90 is 1.47
the final values of w and b are [ 2.36770910e-05  1.45366432e-04  2.59126533e-05  4.66507230e-06
  8.51510785e-04  8.40846382e-06  1.50359891e-04 -5.08290580e-04] and 4.239321284716021e-06
