# Titanic Dataset - Exploratory Data Analysis

This notebook contains exploratory data analysis for the Titanic survival prediction project.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
data = pd.read_csv('../../../data/raw/Titanic.csv')
print(f"Data shape: {data.shape}")
data.head()

In [None]:
# Basic information about the dataset
print("Dataset Info:")
print(data.info())
print("\nMissing values:")
print(data.isnull().sum())

In [None]:
# Survival rate analysis
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
data['Survived'].value_counts().plot(kind='bar')
plt.title('Survival Count')
plt.xlabel('Survived (0: No, 1: Yes)')
plt.ylabel('Count')

plt.subplot(1, 2, 2)
data['Survived'].value_counts(normalize=True).plot(kind='pie', autopct='%1.1f%%')
plt.title('Survival Rate')
plt.ylabel('')

plt.tight_layout()
plt.show()

print(f"Overall survival rate: {data['Survived'].mean():.3f}")

## Feature Analysis

In [None]:
# Analyze survival by passenger class
plt.figure(figsize=(10, 6))
sns.countplot(data=data, x='Pclass', hue='Survived')
plt.title('Survival by Passenger Class')
plt.show()

print("Survival rate by class:")
print(data.groupby('Pclass')['Survived'].mean())

In [None]:
# Analyze survival by gender
plt.figure(figsize=(10, 6))
sns.countplot(data=data, x='Sex', hue='Survived')
plt.title('Survival by Gender')
plt.show()

print("Survival rate by gender:")
print(data.groupby('Sex')['Survived'].mean())