In [14]:
import seaborn as sns
import pandas as pd

# ===============================
# 1. Load Titanic dataset
# ===============================
titanic = sns.load_dataset("titanic")
titanic_orig = titanic.copy()  # keep original copy for comparison

print("===== DATASET INFORMATION =====")
print(f"Total Rows: {titanic.shape[0]}")
print(f"Total Columns: {titanic.shape[1]}")
print("\nColumn Names:", list(titanic.columns))

# ===============================
# 2. Missing Values in All Columns
# ===============================
print("\n===== MISSING VALUES IN EACH COLUMN =====")
print(titanic.isna().sum())



print("\nFirst 5 Rows of Dataset:")
print(titanic.head())

# ===============================
# 3. Fix Missing Values in 'age'
# ===============================
median_age = titanic['age'].median()
titanic['age'] = titanic['age'].fillna(median_age)

# ===============================
# 4. Fix Missing Values in 'embarked'
# ===============================
mode_embarked = titanic['embarked'].mode()[0]
titanic['embarked'] = titanic['embarked'].fillna(mode_embarked)

# ===============================
# 5. Fix Missing Values in 'embark_town'
# ===============================
mode_town = titanic['embark_town'].mode()[0]
titanic['embark_town'] = titanic['embark_town'].fillna(mode_town)

# ===============================
# 6. Drop 'deck' column (too many missing values)
# ===============================
titanic = titanic.drop(columns=['deck'])

# ===============================
# 7. Highlight filled values (Age, Embarked, Embark_Town)
# ===============================
def highlight_filled(s, col):
    """Highlight cells in red if they were originally missing."""
    return [
        'background-color: #FAA0A0' if pd.isna(orig) else ''
        for orig in titanic_orig[col].iloc[:100]
    ]

print("\n===== DATASET SAMPLE (ROWS 0–100) WITH HIGHLIGHTED FILLED VALUES =====")
styled_df = titanic.head(100).style \
    .apply(lambda s: highlight_filled(s, 'age'), subset=['age']) \
    .apply(lambda s: highlight_filled(s, 'embarked'), subset=['embarked']) \
    .apply(lambda s: highlight_filled(s, 'embark_town'), subset=['embark_town'])

styled_df


===== DATASET INFORMATION =====
Total Rows: 891
Total Columns: 15

Column Names: ['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town', 'alive', 'alone']

===== MISSING VALUES IN EACH COLUMN =====
survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64

First 5 Rows of Dataset:
   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    mal

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,Southampton,no,True
5,0,3,male,28.0,0,0,8.4583,Q,Third,man,True,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,Southampton,no,True
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,Southampton,no,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,Cherbourg,yes,False
