In [1]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge

In [2]:
bean = datasets.load_boston()
print bean.DESCR

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [4]:
X_train, X_test, y_train, y_test = load_boston()

In [5]:
X_train.shape

(379L, 13L)

In [6]:
clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
zip (y_test, clf.predict(X_test))

[(12.300000000000001, 12.996595555057269),
 (50.0, 39.697498147443731),
 (33.100000000000001, 32.242531512792766),
 (22.699999999999999, 25.114566864716387),
 (14.9, 17.241825862554187),
 (8.0999999999999996, 1.5948185016999119),
 (50.0, 39.784876397875209),
 (19.399999999999999, 16.57956412551134),
 (21.600000000000001, 26.05553548721381),
 (8.8000000000000007, 7.1665872287725971),
 (14.300000000000001, 13.308014072123564),
 (23.199999999999999, 22.714831706495428),
 (20.899999999999999, 20.77644712576619),
 (23.899999999999999, 27.526623124377785),
 (19.399999999999999, 22.747767900047471),
 (30.100000000000001, 35.408822545711331),
 (20.5, 20.683602375262478),
 (35.399999999999999, 34.60128356212573),
 (18.300000000000001, 18.743207317998685),
 (20.100000000000001, 18.870347487921634),
 (13.1, 14.490898064714845),
 (12.1, 18.089846668253468),
 (5.0, 6.1429781585383729),
 (39.799999999999997, 34.835449322363452),
 (18.699999999999999, 20.523804176082436),
 (22.300000000000001, 27.504

In [21]:
yLrPre=clf.predict(X_test)

In [22]:
r2Score=r2_score(y_test,yLrPre)

In [23]:
r2Score

0.79159300347072048

R^2 Score on Linear Regressor is 0.79159300347072048

In [24]:
mseValue=mean_squared_error(y_test,yLrPre)

In [25]:
mseValue

20.659265269020356

mse on Linear Regressor is 20.659265269020356

In [26]:
ridge=Ridge(alpha=0.01)

In [27]:
ridge.fit(X_train,y_train)

Ridge(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [28]:
zip(y_test,ridge.predict(X_test))

[(12.300000000000001, 12.996430251705286),
 (50.0, 39.696995408851578),
 (33.100000000000001, 32.242205691736039),
 (22.699999999999999, 25.114260184750272),
 (14.9, 17.241888142588266),
 (8.0999999999999996, 1.5976420235002458),
 (50.0, 39.784191285183653),
 (19.399999999999999, 16.580185414663944),
 (21.600000000000001, 26.055295876731556),
 (8.8000000000000007, 7.1674995328415534),
 (14.300000000000001, 13.308684954039968),
 (23.199999999999999, 22.714379805716597),
 (20.899999999999999, 20.77705887209072),
 (23.899999999999999, 27.526661615760876),
 (19.399999999999999, 22.748369733435972),
 (30.100000000000001, 35.407971147377481),
 (20.5, 20.681944111753658),
 (35.399999999999999, 34.600361884420984),
 (18.300000000000001, 18.743771885909954),
 (20.100000000000001, 18.870044609190924),
 (13.1, 14.491224373801327),
 (12.1, 18.089752246696619),
 (5.0, 6.1435599256950866),
 (39.799999999999997, 34.834863588804552),
 (18.699999999999999, 20.52439786015325),
 (22.300000000000001, 27.5

In [29]:
yRPre = ridge.predict(X_test)

In [30]:
r2rScore=r2_score(y_test,yRPre)

In [31]:
r2rScore

0.79161235555750742

R^2 score on the Ridge linear regressor model is 0.79161235555750742

In [32]:
msRScor = mean_squared_error(y_test,yRPre)

In [33]:
msRScor

20.657346907827602

mse on the Ridge linear regressor model is 20.657346907827602