# Titanic Dataset - Visual & Statistical Exploration

This notebook walks through the exploration steps: overview, missing data, survival analysis, and correlations. It uses pandas and matplotlib.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
train = pd.read_csv(r"/mnt/data/train.csv")
train.head()

In [None]:
# Basic summary
print(train.shape)
print(train.describe(include='all').transpose().head())
print('\nMissing values:\n', train.isnull().sum())

In [None]:
# Survival analyses
print('Overall survival rate:', train['Survived'].mean())
print('\nSurvival rate by sex:')
print(train.groupby('Sex')['Survived'].mean())
print('\nSurvival rate by Pclass:')
print(train.groupby('Pclass')['Survived'].mean())

# Plots (save to files)
train['Survived'].value_counts().sort_index().plot(kind='bar', title='Survival Counts')
plt.show()

In [None]:
# Age distribution by survival
train.boxplot(column='Age', by='Survived')
plt.show()

In [None]:
# Correlation matrix
corr = train.select_dtypes(include=[int,float]).corr()
print(corr['Survived'].sort_values(ascending=False))
import matplotlib.pyplot as plt
plt.matshow(corr)
plt.colorbar()
plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
plt.yticks(range(len(corr.columns)), corr.columns)
plt.show()