In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('Life Expectancy Data.csv')

In [2]:
# Prepare the features and target variable
# We'll select a subset of features for simplicity
X = data[['GDP', 'Schooling', 'Income composition of resources']].fillna(0)  # Handle missing values
y = data['Life expectancy '].fillna(data['Life expectancy '].mean())  # Handle missing values in target


In [3]:
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)


Shape of X: (2938, 3)
Shape of y: (2938,)


In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)


Shape of X_train: (2350, 3)
Shape of y_train: (2350,)


In [6]:
# =======================================
# Multiple Linear Regression
# =======================================
model_multiple = LinearRegression()

In [7]:
# Train the model
model_multiple.fit(X_train, y_train)

In [8]:
# Make predictions
y_pred_multiple = model_multiple.predict(X_test)


In [9]:
# Calculate metrics
mse_multiple = mean_squared_error(y_test, y_pred_multiple)
r2_multiple = r2_score(y_test, y_pred_multiple)

In [10]:
print("\nMultiple Linear Regression (Life Expectancy Dataset)")
print(f"  Mean Squared Error: {mse_multiple:.4f}")
print(f"  R² Score: {r2_multiple:.4f}")
print("Intercept:", model_multiple.intercept_)


Multiple Linear Regression (Life Expectancy Dataset)
  Mean Squared Error: 49.9695
  R² Score: 0.4232
Intercept: 55.47415144028027


In [11]:
# Display the first few predictions alongside the actual values
predictions = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred_multiple})
print(predictions.head())

      Actual  Predicted
2546    73.7  69.161956
650     75.9  73.564179
1740    74.2  72.390807
177     76.8  76.244412
1377    51.9  65.118668
