# Titanic Dataset - Exploratory Data Analysis
This notebook performs EDA using Pandas, Matplotlib, and Seaborn.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')

In [None]:
df = pd.read_csv('train.csv')
df.head()

## Data Overview

In [None]:
df.info()

## Descriptive Statistics

In [None]:
df.describe()

## Handling Missing Values

In [None]:
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df['Cabin'].fillna('Missing', inplace=True)

## Categorical Value Counts

In [None]:
df['Survived'].value_counts()
df['Pclass'].value_counts()
df['Sex'].value_counts()
df['Embarked'].value_counts()

## Univariate Visualizations

In [None]:
df['Age'].hist(bins=30)
plt.title('Age Distribution')
plt.show()

In [None]:
sns.boxplot(x='Fare', data=df)
plt.title('Fare Distribution')
plt.show()

## Bivariate Visualizations

In [None]:
sns.countplot(x='Sex', hue='Survived', data=df)
plt.title('Survival by Gender')
plt.show()

In [None]:
sns.countplot(x='Pclass', hue='Survived', data=df)
plt.title('Survival by Class')
plt.show()

In [None]:
sns.boxplot(x='Survived', y='Age', data=df)
plt.title('Age vs Survival')
plt.show()

In [None]:
sns.heatmap(df[['Survived', 'Pclass', 'Age', 'Fare', 'SibSp', 'Parch']].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

## Summary of Findings
- Females had a higher survival rate than males.
- 1st class passengers were more likely to survive.
- Younger passengers and those who paid higher fares were more likely to survive.
- Port C (Cherbourg) had higher survival rates.
- Strong correlation observed between Fare and Pclass.