<a href="https://colab.research.google.com/github/RiverGumSecurity/AILabs/blob/main/020_SupervisedLearning/Linear_Regression.ipynb" target="_new"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression

Learning objectives:
* Selecting a suitable data source
* Defining linear regression.
* Mean squared error as a central concept.
* Outlining why we use linear regression.
* Demonstration and observing machine learning with linear regression.
* Q/A?   Do we have MetaCTF questions we could pose?  

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
## Load the California housing dataset
housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['MedHouseVal'] = housing.target

In [None]:
## Display the first few rows of the dataset
df.head()

In [None]:
## Split the data into training and testing sets
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
## Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
## Make predictions on the testing set
y_pred = model.predict(X_test)


In [None]:
## Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

In [None]:
## Plot the results
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
plt.xlabel('Measured')
plt.ylabel('Predicted')
plt.title('Measured vs Predicted')
plt.show()