In [1]:
import pandas as pd
import plotly.express as pltx
import seaborn as sns
import numpy as np


In [2]:
df = pd.read_csv("../Data/customer_dataset.csv")
df.head()

Unnamed: 0,Customer_ID,Gender,Age,City,Membership_Type,Total_Spend,Items_Purchased,Average_Rating,Discount_Applied,Days_Since_Last_Purchase,Satisfaction_Level
0,101,Female,29,New York,Gold,1120.2,14,4.6,True,25,Satisfied
1,102,Male,34,Los Angeles,Silver,780.5,11,4.1,False,18,Neutral
2,103,Female,43,Chicago,Bronze,510.75,9,3.4,True,42,Unsatisfied
3,104,Male,30,San Francisco,Gold,1480.3,19,4.7,False,12,Satisfied
4,105,Male,27,Miami,Silver,720.4,13,4.0,True,55,Unsatisfied


In [3]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 350 entries, 0 to 349
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Customer_ID               350 non-null    int64  
 1   Gender                    350 non-null    object 
 2   Age                       350 non-null    int64  
 3   City                      350 non-null    object 
 4   Membership_Type           350 non-null    object 
 5   Total_Spend               350 non-null    float64
 6   Items_Purchased           350 non-null    int64  
 7   Average_Rating            350 non-null    float64
 8   Discount_Applied          350 non-null    bool   
 9   Days_Since_Last_Purchase  350 non-null    int64  
 10  Satisfaction_Level        348 non-null    object 
dtypes: bool(1), float64(2), int64(4), object(4)
memory usage: 27.8+ KB


In [4]:
df.dtypes


Customer_ID                   int64
Gender                       object
Age                           int64
City                         object
Membership_Type              object
Total_Spend                 float64
Items_Purchased               int64
Average_Rating              float64
Discount_Applied               bool
Days_Since_Last_Purchase      int64
Satisfaction_Level           object
dtype: object

In [5]:
df.isnull().sum()


Customer_ID                 0
Gender                      0
Age                         0
City                        0
Membership_Type             0
Total_Spend                 0
Items_Purchased             0
Average_Rating              0
Discount_Applied            0
Days_Since_Last_Purchase    0
Satisfaction_Level          2
dtype: int64

In [6]:

df.describe()

Unnamed: 0,Customer_ID,Age,Total_Spend,Items_Purchased,Average_Rating,Days_Since_Last_Purchase
count,350.0,350.0,350.0,350.0,350.0,350.0
mean,275.5,33.597143,845.381714,12.6,4.019143,26.588571
std,101.180532,4.870882,362.058695,4.155984,0.580539,13.440813
min,101.0,26.0,410.8,7.0,3.0,9.0
25%,188.25,30.0,502.0,9.0,3.5,15.0
50%,275.5,32.5,775.2,12.0,4.1,23.0
75%,362.75,37.0,1160.6,15.0,4.5,38.0
max,450.0,43.0,1520.1,21.0,4.9,63.0


In [7]:
df.drop(columns=['Customer_ID'], inplace=True)

df.head()

Unnamed: 0,Gender,Age,City,Membership_Type,Total_Spend,Items_Purchased,Average_Rating,Discount_Applied,Days_Since_Last_Purchase,Satisfaction_Level
0,Female,29,New York,Gold,1120.2,14,4.6,True,25,Satisfied
1,Male,34,Los Angeles,Silver,780.5,11,4.1,False,18,Neutral
2,Female,43,Chicago,Bronze,510.75,9,3.4,True,42,Unsatisfied
3,Male,30,San Francisco,Gold,1480.3,19,4.7,False,12,Satisfied
4,Male,27,Miami,Silver,720.4,13,4.0,True,55,Unsatisfied


In [9]:
age_bins = [20, 25, 30, 35, 40, 45, float('inf')]
age_labels = ['20-24', '25-29', '30-34', '35-39', '40-44', '45+']

df['AgeBin'] = pd.cut(df['Age'], bins=age_bins, labels=age_labels, right=False)

age_counts = df['AgeBin'].value_counts().sort_index().reset_index()
age_counts.columns = ['Age Group', 'Age Group Count']

plot = pltx.bar(
    age_counts,
    x='Age Group',
    y='Age Group Count',
    title="Age Group Distribution of Customers",
    text='Age Group Count',
    color='Age Group'
)

plot.update_layout(
    xaxis_title="Age Group",
    yaxis_title="Age Group Count",
    xaxis={'categoryorder': 'array', 'categoryarray': age_labels}
)

# Show plot
plot.show()