#### In this notebook we shall build a Locally Weighted Linear Regression Model and will test it on an Abalone Dataset: 

In [1]:
#Importing the requisite Libraries/Packages:
from numpy import *

In [2]:
#Loading the data from a text file('abalone.txt'):
def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t'))-1
    dataMat = []; labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine= line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat,labelMat

In [3]:
#Constructing the function for 'Locally Weighted Linear Regression Algorithm':
def lwlr(testPoint,xArr,yArr,k=1.0):
    xMat = mat(xArr); yMat = mat(yArr).T
    m = shape(xMat)[0]
    weights = mat(eye((m)))
    for j in range(m):
        diffMat = testPoint - xMat[j,:]
        weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
    xTx = xMat.T * (weights * xMat)
    if linalg.det(xTx) == 0.0:
        print ("This matrix is singular, cannot do inverse")
        return
    ws = xTx.I * (xMat.T * (weights * yMat))
    return testPoint * ws

In [4]:
#Constructing the function for testing the data:
def lwlrTest(testArr,xArr,yArr,k=1.0):
    m = shape(testArr)[0]
    yHat = zeros(m)
    for i in range(m):
        yHat[i] = lwlr(testArr[i],xArr,yArr,k)
    return yHat

In [6]:
#Constructing the function for calculating the error:
def rssError(yArr,yHatArr):
    return ((yArr-yHatArr)**2).sum()

In [7]:
#Splitting the data into 'Attributes(x)' and 'Labels(y)':
abX,abY = loadDataSet('abalone.txt')

In [9]:
#Testing the training data for various values of 'k':
yHat01 = lwlrTest(abX[0:99],abX[0:99],abY[0:99],0.1)
yHat1 = lwlrTest(abX[0:99],abX[0:99],abY[0:99],1)
yHat10 = lwlrTest(abX[0:99],abX[0:99],abY[0:99],10)

In [11]:
#Error calculation for k=0.1:
rssError(abY[0:99],yHat01.T)

56.82523568972884

In [13]:
#Error calculation for k=1:
rssError(abY[0:99],yHat1.T)

429.89056187006514

In [14]:
#Error calculation for k=10:
rssError(abY[0:99],yHat10.T)

549.1181708826451

In [15]:
#Calculating the error for Testing-data when k=0.1:
yHat01 = lwlrTest(abX[100:199],abX[0:99],abY[0:99],0.1)
rssError(abY[100:199],yHat01.T)

41317.161723642595

In [17]:
#Calculating the error for Testing-data when k=1:
yHat1 = lwlrTest(abX[100:199],abX[0:99],abY[0:99],1)
rssError(abY[100:199],yHat1.T)

573.526144189767

In [19]:
#Calculating the error for Testing-data when k=10:
yHat10 = lwlrTest(abX[100:199],abX[0:99],abY[0:99],10)
rssError(abY[100:199],yHat10.T)

517.5711905387598