In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [6]:
# Fetch the dataset
cal_housing = fetch_california_housing()

# Create a DataFrame
df = pd.DataFrame(
    data=cal_housing.data,
    columns=cal_housing.feature_names
)
df['target'] = cal_housing.target

# Display the first few rows to verify
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [7]:
def analyze_feature_relationship(feature_name, target_name='target'):
    """
    Creates a simple linear regression model for a specific feature,
    identifies the relationship, and calculates R-squared.
    """
    # Define X (Feature) and y (Target)
    X = df[[feature_name]]
    y = df[target_name]

    # Split the data (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Initialize and fit the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate R-squared
    r2 = r2_score(y_test, y_pred)

    # Get the coefficient to determine relationship
    coef = model.coef_[0]
    relationship = "Positive" if coef > 0 else "Negative"

    # Print results
    print(f"--- Analysis for {feature_name} ---")
    print(f"Coefficient: {coef:.4f}")
    print(f"Relationship with Target: {relationship}")
    print(f"R-squared Value: {r2:.4f}")
    print("\n")

    return model

In [8]:
# Task: Identify relationship and R-square for AveRooms
model_rooms = analyze_feature_relationship('AveRooms')

--- Analysis for AveRooms ---
Coefficient: 0.0768
Relationship with Target: Positive
R-squared Value: 0.0138




In [9]:
# Task: Identify relationship and R-square for AveBedrms
model_bedrms = analyze_feature_relationship('AveBedrms')

--- Analysis for AveBedrms ---
Coefficient: -0.1371
Relationship with Target: Negative
R-squared Value: -0.0004


