In [None]:
#importing important sklearn libraries
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

In [None]:
#Loading California Housing Dataset
housing_dataset = fetch_california_housing()

In [None]:
#Looking at the dataset to get an idea
print(housing_dataset.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [None]:
#Checking the features name
housing_dataset.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [None]:
#Checking the target name
housing_dataset.target_names

['MedHouseVal']

In [None]:
#Checking the features data
housing_dataset.data

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [None]:
#Checking the target data
housing_dataset.target

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [None]:
#Defining x, y
x = housing_dataset['data']
y = housing_dataset['target']

In [None]:
#Putting all Features on uniform scale uing StandardScaler
scaler=StandardScaler()
x=scaler.fit_transform(x)


In [None]:
print(x.shape,y.shape)

(20640, 8) (20640,)


In [None]:
# Splitting the data into Train and Test Data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20,random_state=42)

In [None]:
#Looking at the Training data
print(x_train.shape,y_train.shape)

(16512, 8) (16512,)


In [None]:
#Looking at the Test data
print(x_test.shape,y_test.shape)

(4128, 8) (4128,)


In [None]:
#Using LinerRegression
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [None]:
#Evaluation Metrics
r2_s = r2_score(y_test, y_pred)
mse_err = mean_squared_error(y_test, y_pred)

print(r2_s, mse_err)

0.5757877060324511 0.555891598695244
