<h1>Decision Tree Regression with Cross Validation</h1>

In [28]:
# Step 1: Import required libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error


In [29]:
# Step 2: Load California Housing dataset
housing = fetch_california_housing(as_frame=True)

X = housing.data      # Feature variables
y = housing.target   # Target variable



In [30]:
X

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32


In [31]:
y

0        4.526
1        3.585
2        3.521
3        3.413
4        3.422
         ...  
20635    0.781
20636    0.771
20637    0.923
20638    0.847
20639    0.894
Name: MedHouseVal, Length: 20640, dtype: float64

In [32]:
# Step 3: Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)



In [33]:
# Step 4: Create Decision Tree Regressor
model = DecisionTreeRegressor(random_state=42)


In [34]:
# Step 5: Apply Cross Validation
cv_scores = cross_val_score(
    model,
    X_train,
    y_train,
    cv=5,
    scoring='neg_mean_squared_error'
)

print("Cross Validation MSE:", cv_scores.mean())


Cross Validation MSE: -0.5245870446725656


In [35]:
# Step 6: Train the model
model.fit(X_train, y_train)


In [36]:
# Step 7: Make predictions
y_pred = model.predict(X_test)


In [37]:
y_pred

array([0.414  , 1.203  , 5.00001, ..., 5.00001, 0.66   , 2.172  ])

In [38]:
# Step 8: Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
print("Test Mean Squared Error:", mse)


Test Mean Squared Error: 0.495235205629094


In [39]:
from sklearn.metrics import r2_score
score=r2_score(y_pred,y_test)

In [40]:
score

0.628592258773339