In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [5]:
# Load the California Housing dataset
california = fetch_california_housing()
data = pd.DataFrame(california.data, columns=california.feature_names)
data['PRICE'] = california.target

In [6]:
# Display the first few rows of the dataset
print(data.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  PRICE  
0    -122.23  4.526  
1    -122.22  3.585  
2    -122.24  3.521  
3    -122.25  3.413  
4    -122.25  3.422  


In [7]:
# Define the features (X) and the target (y)
X = data.drop('PRICE', axis=1)
y = data['PRICE']


In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Create a Linear Regression model
model = LinearRegression()

In [10]:
# Train the model using the training data
model.fit(X_train, y_train)

In [11]:
# Make predictions using the testing data
y_pred = model.predict(X_test)

In [12]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [13]:
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 0.5558915986952443
R^2 Score: 0.5757877060324508


In [14]:
# Display the coefficients of the model
print("Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef}")

Coefficients:
MedInc: 0.4486749096657172
HouseAge: 0.009724257517904804
AveRooms: -0.12332334282795833
AveBedrms: 0.7831449067929713
Population: -2.0296205801456127e-06
AveOccup: -0.0035263184871341642
Latitude: -0.419792486588359
Longitude: -0.43370806496398795


In [15]:
print(f"Dimensionality of california.data: {california.data.shape}")


Dimensionality of california.data: (20640, 8)


In [16]:
print("Column names of the original California dataset:", california.feature_names)

Column names of the original California dataset: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [22]:
print(california.target.shape)

(20640,)


In [1]:
from prometheus_api_client import PrometheusConnect, MetricSnapshotDataFrame