In [0]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score




In [0]:
# Create a sample dataset with 5 features
np.random.seed(0)
data_size = 100

# Generating synthetic data
X1 = np.random.normal(10, 5, data_size)  # Feature 1
X2 = np.random.normal(20, 10, data_size)  # Feature 2
X3 = np.random.normal(30, 15, data_size)  # Feature 3
X4 = np.random.normal(5, 2, data_size)   # Feature 4
X5 = np.random.normal(100, 50, data_size)  # Feature 5

# Creating the target variable (Y) based on a linear combination of Xs
y = 3*X1 + 2*X2 + 1.5*X3 + 0.5*X4 + 0.2*X5 + np.random.normal(0, 10, data_size)

# Create DataFrame
df = pd.DataFrame({
    'Feature1': X1,
    'Feature2': X2,
    'Feature3': X3,
    'Feature4': X4,
    'Feature5': X5,
    'Target': y
})

# Show the first few rows
df.head()


In [0]:
# Split data into features (X) and target variable (y)
X = df.drop('Target', axis=1)
y = df['Target']

# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [0]:
# Initialize the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-Squared: {r2}")


In [0]:
# Plotting the true vs predicted values
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('True vs Predicted')
plt.show()
