In [1]:
import pandas as pd
import numpy as np

from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

data = {
    'Age': [20, np.nan, 18, 19, 21],
    'TestScore': [85, 90, np.nan, 92, 88],
    'Salary': [30000, 35000, 40000, 45000, 42000]
}

df = pd.DataFrame(data)
print("Original Dataset")
print(df)

num_imputer = SimpleImputer(strategy='mean')
df[['Age', 'TestScore']] = num_imputer.fit_transform(df[['Age', 'TestScore']])

print("\nAfter Mean Imputation")
print(df)

X = df[['Age', 'TestScore']]
y = df['Salary']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Predicted Salary:", y_pred)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

Original Dataset
    Age  TestScore  Salary
0  20.0       85.0   30000
1   NaN       90.0   35000
2  18.0        NaN   40000
3  19.0       92.0   45000
4  21.0       88.0   42000

After Mean Imputation
    Age  TestScore  Salary
0  20.0      85.00   30000
1  19.5      90.00   35000
2  18.0      88.75   40000
3  19.0      92.00   45000
4  21.0      88.00   42000
Predicted Salary: [42840.46603716]
MSE: 61472907.679877065
R2 Score: nan


