In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [18]:
# Load dataset
df = pd.read_csv("salary/Salary_dataset.csv")

X = df[['YearsExperience']]   # feature
y = df[['Salary']]              # target

In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=42
)

In [20]:
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse_no_scaling = mean_squared_error(y_test, y_pred)

print("MSE without scaling:", mse_no_scaling)


MSE without scaling: 49830096.855908334


In [21]:
x_mm = MinMaxScaler()
y_mm = MinMaxScaler()

X_train_mm = x_mm.fit_transform(X_train)
X_test_mm = x_mm.transform(X_test)

y_train_mm = y_mm.fit_transform(y_train)


model_mm = LinearRegression()
model_mm.fit(X_train_mm, y_train_mm)

y_pred_mm_scaled = model_mm.predict(X_test_mm)
y_pred_mm = y_mm.inverse_transform(y_pred_mm_scaled)

mse_minmax = mean_squared_error(y_test, y_pred_mm)
print("MSE with Min-Max scaling:", mse_minmax)



MSE with Min-Max scaling: 49830096.855908275


In [22]:
x_std = StandardScaler()
y_std = StandardScaler()

X_train_std = x_std.fit_transform(X_train)
X_test_std = x_std.transform(X_test)

y_train_std = y_std.fit_transform(y_train)


model_std = LinearRegression()
model_std.fit(X_train_std, y_train_std)

y_pred_std_scaled = model_std.predict(X_test_std)

y_pred_std = y_std.inverse_transform(y_pred_std_scaled)

mse_standard = mean_squared_error(y_test, y_pred_std)
print("MSE with Standard normalization:", mse_standard)


MSE with Standard normalization: 49830096.855908304


In [23]:
print("\n--- Final MSE Comparison ---")
print("No scaling:", mse_no_scaling)
print("Min-Max scaling:", mse_minmax)
print("Standard normalization:", mse_standard)



--- Final MSE Comparison ---
No scaling: 49830096.855908334
Min-Max scaling: 49830096.855908275
Standard normalization: 49830096.855908304
