In [None]:
import pandas as pd
import altair as alt

In [None]:
data = pd.read_csv('/content/train.csv')

In [None]:
data_clean = data.dropna(subset=['Age'])
data_clean = data_clean.dropna(subset=['Sex', 'Survived'])

In [None]:
male_count = (data_clean['Sex'] == 'male').sum()
female_count = (data_clean['Sex'] == 'female').sum()
print(male_count)
print(female_count)

453
261


In [None]:
bins = [0, 12, 18, 30, 40, 50, 60, 70, 80]
labels = ['0-12', '13-18', '19-30', '31-40', '41-50', '51-60', '61-70', '71-80']
data_clean['AgeGroup'] = pd.cut(data_clean['Age'], bins=bins, labels=labels, right=False)

data_clean['Sex'] = data_clean['Sex'].str.capitalize()

In [None]:
data_clean['Survived'] = data_clean['Survived'].map({0: 'Did not Survive', 1: 'Survived'})

# Calculate the count of survivors and non-survivors within each group
survival_counts = data_clean.groupby(['AgeGroup', 'Sex', 'Survived']).size().reset_index(name='Count')

# Calculate the total count within each age and sex group for normalization
total_counts = survival_counts.groupby(['AgeGroup', 'Sex'])['Count'].transform('sum')

# Calculate percentage for normalization
survival_counts['Percentage'] = (survival_counts['Count'] / total_counts) * 100

# Define the color scale for the Survived column
color_scale = alt.Scale(domain=['Did not Survive', 'Survived'],
                        range=['lightgrey', 'lightskyblue'])


In [None]:
# Create the chart
chart = alt.Chart(survival_counts, width=300, height=400).mark_bar().encode(
    x=alt.X('AgeGroup:N', title='Age Group'),
    y=alt.Y('Percentage:Q', scale=alt.Scale(domain=[0, 100]), title='Percentage Survived/Did not Survive'),
    color=alt.Color('Survived:N', scale=color_scale),
    column='Sex:N',
    tooltip=[alt.Tooltip('AgeGroup:N', title='Age Group'), alt.Tooltip('Percentage:Q', title='Percentage', format='.1f'), 'Sex:N', 'Survived:N']
).properties(
    title='Impact of Gender and Age on Survival rate'
).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolSize=100
).configure_title(
    fontSize=20
).configure_header(
    titleFontSize=16,
    labelFontSize=14
)

# Display the chart
chart