# 🎯 Linear Regression on Titanic Dataset


This notebook demonstrates both **Simple** and **Multiple Linear Regression** using the Titanic Dataset.
We aim to predict the `Fare` a passenger paid based on features like Age, Pclass, SibSp, and Parch.


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:

# Load the Titanic dataset
df = pd.read_csv("Titanic-Dataset.csv")

# Display first few rows
df.head()


In [None]:

# Select relevant columns
df = df[['Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]

# Drop rows with missing values in 'Age' or 'Fare'
df.dropna(subset=['Age', 'Fare'], inplace=True)

# Check for missing values
df.isnull().sum()


## 📈 Simple Linear Regression: Predict Fare based on Age

In [None]:

X = df[['Age']]  # Feature
y = df['Fare']   # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict
y_pred = lr.predict(X_test)

# Evaluation
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

# Plot
plt.scatter(X_test, y_test, color='blue')
plt.plot(X_test, y_pred, color='red')
plt.xlabel('Age')
plt.ylabel('Fare')
plt.title('Simple Linear Regression: Age vs Fare')
plt.show()


## 📊 Multiple Linear Regression: Predict Fare using multiple features

In [None]:

X = df[['Age', 'Pclass', 'SibSp', 'Parch']]
y = df['Fare']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict
y_pred = lr.predict(X_test)

# Evaluation
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

# Coefficients
print("Intercept:", lr.intercept_)
print("Coefficients:", lr.coef_)

# Feature mapping
coeff_df = pd.DataFrame(lr.coef_, X.columns, columns=['Coefficient'])
coeff_df
