In [1]:
import pandas as pd 
from pandas_datareader import data 
import numpy as np 
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
from sklearn.metrics import r2_score 
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV 

In [2]:
housing = fetch_california_housing()
df = pd.DataFrame(housing.data)

In [3]:
df.columns = housing.feature_names
df['Target'] = housing.target

In [4]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [5]:
df.shape

(20640, 9)

In [6]:
x = df.iloc[:,0:8]
y = df.iloc[:,8]

In [7]:
x.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [8]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [9]:
DT_Regressor = DecisionTreeRegressor(criterion='friedman_mse',max_depth=5)
DT_Regressor.fit(x_train,y_train)

In [10]:
y_predict = DT_Regressor.predict(x_test)

In [11]:
r2 = r2_score(y_test,y_predict)


In [12]:
print(r2)

0.5997321244428706


Hyperparameter Tuning 

In [25]:
param_grid ={
    'max_depth':[2,4,8,10,None],
    'criterion':['friedman_mse','absolute_error'],
    'max_features':[0.25,0.5,1.0]
    
}

In [26]:
REG = GridSearchCV(DecisionTreeRegressor(),param_grid=param_grid,error_score='raise')

In [28]:
REG.fit(x_train,y_train)

In [29]:
REG.best_score_

0.6971632289091145

In [30]:
REG.best_params_

{'criterion': 'friedman_mse', 'max_depth': 8, 'max_features': 1.0}

Featues Importance 

In [39]:
for importance,name in sorted(zip(DT_Regressor.feature_importances_,x_train.columns),reverse=True):
    print(name,importance)

MedInc 0.7712117162048137
AveOccup 0.1284067461489675
HouseAge 0.04162087993606977
AveRooms 0.03126072126800431
Latitude 0.022049480286782774
Population 0.002484998287177666
Longitude 0.002096950201375074
AveBedrms 0.0008685076668093605
