In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = sns.load_dataset('titanic')
df.head()

In [None]:
freq_table = df['pclass'].value_counts()
rel_freq = df['pclass'].value_counts(normalize=True) * 100
cum_freq = freq_table.cumsum()

freq_df = pd.DataFrame({
    'Absolute': freq_table,
    'Relative (%)': rel_freq,
    'Cumulative': cum_freq
})
print(freq_df)


In [None]:
table = pd.crosstab(df['sex'], df['survived'], margins=True, )
table = table.rename(columns={1: 'Survived = 1', 0: 'Survived = 0'})
table = table.rename(index={'male': 'Male', 'female': 'Female'})

table.columns.name = 'Survival'
table.index.name = 'Sex'

print(table)


In [None]:
total = len(df)

# Joint probability: P(Sex = Female, Survived = 1)
joint_prob = len(df[(df['sex']=='female') & (df['survived']==1)]) / total
print(f"Joint Probability P(Sex = Female, Survived = 1): {joint_prob:.4f}")

#------------------------------------------------------------------------------------------------------------
p_female = len(df[df['sex']=='female']) / total
p_survived = len(df[df['survived']==1]) / total
print("\nMarginal probability -")
print(f"P(Sex = Female): {p_female:.4f} \nP(Survived = 1): {p_survived:.4f}")

#------------------------------------------------------------------------------------------------------------
print("Conditional probabilities - ")
# P(Survived=1 | Sex=female)
cond1 = joint_prob / p_female
print(f"\nP(Survived=1 | Sex=female): {cond1:.4f}")
female_given_survive = pd.crosstab(df['sex'], df['survived'], normalize='index')
female_given_survive = female_given_survive.rename(columns={1: 'Survived = 1', 0: 'Survived = 0'})
female_given_survive = female_given_survive.rename(index={'male': 'Not Female', 'female': 'Female'})

print(female_given_survive)

# P(Sex=female | Survived=1)
cond2 = joint_prob / p_survived
print(f"\nP(Sex=female | Survived=1): {cond2:.4f}")
survive_given_female = pd.crosstab(df['sex'], df['survived'], normalize='columns')
survive_given_female = survive_given_female.rename(columns={1: 'Survived = 1', 0: 'Survived = 0'})
survive_given_female = survive_given_female.rename(index={'male': 'Not Female', 'female': 'Female'})
print(survive_given_female)

In [None]:
df_clean = df[['age','fare']].dropna()

corr = df_clean['age'].corr(df_clean['fare'])
print("Pearson Correlation:", corr)

sns.heatmap(df_clean.corr(), annot=True, cmap='coolwarm')
plt.show()
sns.pairplot(df_clean)
plt.show()

plt.scatter(df_clean['age'], df_clean['fare'], alpha=0.6, color='teal')
plt.xlabel('Age')
plt.ylabel('Fare')
plt.title('Scatter Plot: Age vs Fare')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()



In [None]:
class_survival = pd.crosstab(df['class'], df['survived'])
class_survival.plot(kind='bar', stacked=True)
plt.xlabel('Class')
plt.ylabel('Count')
plt.title('Survival by Class')
plt.show()
