# Simple Linear Regression on the Titanic Dataset

A minimal example predicting Titanic survival probability using linear regression.

In [None]:
# Install seaborn if needed (uncomment if running locally)
# !pip install seaborn

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [None]:
# Load Titanic dataset
titanic = sns.load_dataset('titanic')
titanic.head()

## Preprocess Data
- Select a few features (age, fare, sex, pclass)
- Drop missing values
- Encode categorical variable (sex)

In [None]:
# Select features
features = ['age', 'fare', 'sex', 'pclass']
df = titanic[features + ['survived']].dropna()

# Encode 'sex' (male=0, female=1)
df['sex'] = df['sex'].map({'male': 0, 'female': 1})

X = df[['age', 'fare', 'sex', 'pclass']]
y = df['survived']

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [None]:
# Train linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

print("Coefficients:", lr.coef_)
print("Intercept:", lr.intercept_)

In [None]:
# Predict
y_pred = lr.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")

## Plot predictions vs actual

In [None]:
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel('Actual Survived')
plt.ylabel('Predicted (Probability)')
plt.title('Linear Regression: Titanic Survival')
plt.grid(True)
plt.show()

That's it! This notebook demonstrates a minimal linear regression workflow on the Titanic dataset.