In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
    mean_absolute_error, 
    mean_squared_error, 
    median_absolute_error, 
    mean_absolute_percentage_error, 
    r2_score, 
    explained_variance_score,
    max_error
)
import numpy as np

# Load the dataset
df = pd.read_csv("Salary_Data.csv")

# Split into features and target
X = df[['YearsExperience']]
y = df['Salary']

# Train the Linear Regression model
model = LinearRegression()
model.fit(X, y)

# Predict
y_pred = model.predict(X)

# Calculate error metrics
mae = mean_absolute_error(y, y_pred)
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y, y_pred)
medae = median_absolute_error(y, y_pred)
mape = mean_absolute_percentage_error(y, y_pred)
max_err = max_error(y, y_pred)
evs = explained_variance_score(y, y_pred)

# Display the results
print(f"📌 Linear Regression Equation:")
print(f"Salary = {model.coef_[0]:.2f} * YearsExperience + {model.intercept_:.2f}\n")

print("📊 Error Metrics:")
print(f"1. Mean Absolute Error (MAE): {mae:.2f}")
print(f"2. Mean Squared Error (MSE): {mse:.2f}")
print(f"3. Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"4. Median Absolute Error: {medae:.2f}")
print(f"5. Mean Absolute Percentage Error (MAPE): {mape*100:.2f}%")
print(f"6. Max Error: {max_err:.2f}")
print(f"7. R² Score: {r2:.4f}")
print(f"8. Explained Variance Score: {evs:.4f}")

📌 Linear Regression Equation:
Salary = 9449.96 * YearsExperience + 25792.20

📊 Error Metrics:
1. Mean Absolute Error (MAE): 4644.20
2. Mean Squared Error (MSE): 31270951.72
3. Root Mean Squared Error (RMSE): 5592.04
4. Median Absolute Error: 4017.93
5. Mean Absolute Percentage Error (MAPE): 7.05%
6. Max Error: 11448.03
7. R² Score: 0.9570
8. Explained Variance Score: 0.9570
