In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
# Load dataset
df = pd.read_csv("salary/Salary_dataset.csv")

X = df[['YearsExperience']]   # feature
y = df['Salary']              # target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=42
)

In [4]:
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse_no_norm = mean_squared_error(y_test, y_pred)

print("MSE without normalization:", mse_no_norm)

MSE without normalization: 49830096.855908334


In [5]:
minmax = MinMaxScaler()

X_train_mm = minmax.fit_transform(X_train)
X_test_mm = minmax.transform(X_test)

model_mm = LinearRegression()
model_mm.fit(X_train_mm, y_train)

y_pred_mm = model_mm.predict(X_test_mm)
mse_minmax = mean_squared_error(y_test, y_pred_mm)

print("MSE with Min-Max scaling:", mse_minmax)

MSE with Min-Max scaling: 49830096.85590833


In [6]:
scaler = StandardScaler()

X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

model_std = LinearRegression()
model_std.fit(X_train_std, y_train)

y_pred_std = model_std.predict(X_test_std)
mse_standard = mean_squared_error(y_test, y_pred_std)

print("MSE with Standard normalization:", mse_standard)

MSE with Standard normalization: 49830096.85590836


In [7]:
print("\n--- MSE Comparison ---")
print("No normalization:", mse_no_norm)
print("Min-Max scaling:", mse_minmax)
print("Standard normalization:", mse_standard)


--- MSE Comparison ---
No normalization: 49830096.855908334
Min-Max scaling: 49830096.85590833
Standard normalization: 49830096.85590836
