# Titanic Dataset EDA
This notebook performs an Exploratory Data Analysis (EDA) on the Titanic dataset.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("train.csv")
df.head()


In [None]:

df.info()
df.describe()
df.isnull().sum()


In [None]:

# Univariate plots
sns.countplot(x='Survived', data=df)
plt.title('Survival Count')
plt.show()

sns.countplot(x='Sex', data=df)
plt.title('Gender Count')
plt.show()

sns.countplot(x='Pclass', data=df)
plt.title('Passenger Class Count')
plt.show()

df['Age'].hist(bins=30)
plt.title('Age Distribution')
plt.show()

df['Fare'].hist(bins=30)
plt.title('Fare Distribution')
plt.show()


In [None]:

sns.countplot(x='Survived', hue='Sex', data=df)
plt.title('Survival by Gender')
plt.show()

sns.countplot(x='Survived', hue='Pclass', data=df)
plt.title('Survival by Class')
plt.show()

sns.boxplot(x='Pclass', y='Age', data=df)
plt.title('Age Distribution by Class')
plt.show()


In [None]:

# Encode categorical variables
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})

# Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()



## Summary of Findings:
- Female passengers had a much higher survival rate than males.
- 1st class passengers had higher survival rates.
- Most passengers were from 3rd class and male.
- Younger passengers had slightly better survival.
- Higher fare slightly increased chances of survival.
