In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from fredapi import Fred
from dotenv import load_dotenv
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [3]:

# The load_boston dataset has been removed from scikit-learn.
# We'll use the California housing dataset instead.
from sklearn.datasets import fetch_california_housing

# Fetch the California housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Print some information about the dataset
print("Dataset shape:", X.shape)
print("Feature names:", housing.feature_names)
print("Target name:", housing.target_names)


Dataset shape: (20640, 8)
Feature names: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Target name: ['MedHouseVal']


In [5]:
from sklearn.neighbors import KNeighborsRegressor

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions on the test set
lr_predictions = lr_model.predict(X_test)

# Print some evaluation metrics
print("Linear Regression Results:")
print("Mean Squared Error:", mean_squared_error(y_test, lr_predictions))
print("R-squared Score:", r2_score(y_test, lr_predictions))

# Initialize and train the K-Nearest Neighbors model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

# Make predictions on the test set
knn_predictions = knn_model.predict(X_test)

# Print some evaluation metrics
print("\nK-Nearest Neighbors Results:")
print("Mean Squared Error:", mean_squared_error(y_test, knn_predictions))
print("R-squared Score:", r2_score(y_test, knn_predictions))

# Print the first 3 predictions from both models
print("\nFirst 3 predictions (Linear Regression):", lr_predictions[:3])
print("First 3 predictions (KNN):", knn_predictions[:3])
# Import KNeighborsRegressor

# The rest of the code remains the same



Linear Regression Results:
Mean Squared Error: 0.555891598695242
R-squared Score: 0.5757877060324526

K-Nearest Neighbors Results:
Mean Squared Error: 1.1186823858768293
R-squared Score: 0.14631049965900345

First 3 predictions (Linear Regression): [0.71912284 1.76401657 2.70965883]
First 3 predictions (KNN): [1.623  1.0822 2.8924]


In [7]:
# Load the California housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Create a KNeighborsRegressor model
mod = KNeighborsRegressor().fit(X, y)

# Make predictions
pred = mod.predict(X)

# Create a scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(pred, y, alpha=0.5)
plt.xlabel('Predicted Values')
plt.ylabel('Actual Values')
plt.title('KNN Regression: Predicted vs Actual Values')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)  # Add a diagonal line
plt.tight_layout()
st.pyplot(plt)

  plt.show()
