In [19]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact, Dropdown, fixed
from IPython.display import display
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go

In [20]:
# Path to go up one level from the current folder to the parent folder
parent_path = os.path.join(os.getcwd(), '..')

# Path to the target data folder
data_folder_path = os.path.join(parent_path, 'Data')

In [21]:
menu_df = pd.read_csv(os.path.join(data_folder_path, 'menu_analysis.csv'))
customers_df = pd.read_csv(os.path.join(data_folder_path, 'customers.csv'))
stores_df = pd.read_csv(os.path.join(data_folder_path, 'stores.csv'))
structure_df = pd.read_csv(os.path.join(data_folder_path, 'structure.csv'))
size_df = pd.read_csv(os.path.join(data_folder_path, 'order_size.csv'))
transactions_df = pd.read_csv(
        os.path.join(data_folder_path, "transactions.csv"),
        parse_dates=['Transaction_Date']  # Modify as needed if format issues arise
    )

In [22]:
customers_df.head()

Unnamed: 0,Customer_ID,First_Name,Last_Name,Email,Phone,DOB,Gender,Address,City,ST,Zip_Code,Member_Since,Newsletter_Subscription,Feedback_Provided,Communication_Preferences,Data_Sharing_Consent
0,CUST0000,Sheila,Garcia,perryamy@example.org,691-567-7618x9089,1994-09-15,Other,10471 Vasquez Squares,Johnhaven,MN,55718,2021-04-01,True,True,Email,True
1,CUST0001,Matthew,Simpson,uwalls@example.net,001-857-509-5186x599,1989-05-01,Male,0220 Vincent Lights Suite 286,Osbornebury,MN,55025,2022-04-01,False,True,SMS,True
2,CUST0002,Michael,Curry,mcoleman@example.com,001-232-634-3256x417,1954-06-27,Male,4884 Casey Valley,Morenoport,MN,55845,2020-06-11,True,True,SMS,True
3,CUST0003,Tara,Forbes,westdaniel@example.net,+1-237-805-3539x12979,1989-11-12,Other,556 Kevin Forest,Tannerfurt,MN,55465,2020-12-12,False,False,Email,True
4,CUST0004,Leslie,Lee,amber03@example.net,984.432.7771x47405,1993-05-24,Female,60452 Ferguson Club Apt. 171,Allisonview,MN,55131,2022-04-27,True,True,,False


### Modification 1

In [23]:
# Assume customers_df is already loaded and 'DOB' column exists
current_date = datetime.now()
customers_df['DOB'] = pd.to_datetime(customers_df['DOB'])

# Calculate age
customers_df['Age'] = (current_date - customers_df['DOB']) // np.timedelta64(1, 'Y')

In [24]:
customers_df.head()

Unnamed: 0,Customer_ID,First_Name,Last_Name,Email,Phone,DOB,Gender,Address,City,ST,Zip_Code,Member_Since,Newsletter_Subscription,Feedback_Provided,Communication_Preferences,Data_Sharing_Consent,Age
0,CUST0000,Sheila,Garcia,perryamy@example.org,691-567-7618x9089,1994-09-15,Other,10471 Vasquez Squares,Johnhaven,MN,55718,2021-04-01,True,True,Email,True,29
1,CUST0001,Matthew,Simpson,uwalls@example.net,001-857-509-5186x599,1989-05-01,Male,0220 Vincent Lights Suite 286,Osbornebury,MN,55025,2022-04-01,False,True,SMS,True,35
2,CUST0002,Michael,Curry,mcoleman@example.com,001-232-634-3256x417,1954-06-27,Male,4884 Casey Valley,Morenoport,MN,55845,2020-06-11,True,True,SMS,True,70
3,CUST0003,Tara,Forbes,westdaniel@example.net,+1-237-805-3539x12979,1989-11-12,Other,556 Kevin Forest,Tannerfurt,MN,55465,2020-12-12,False,False,Email,True,34
4,CUST0004,Leslie,Lee,amber03@example.net,984.432.7771x47405,1993-05-24,Female,60452 Ferguson Club Apt. 171,Allisonview,MN,55131,2022-04-27,True,True,,False,31


### Modification 2

In [27]:
# Merge customer data with transaction data
merged_df = pd.merge(transactions_df, menu_df[['Item_Number', 'Price']], on='Item_Number')
merged_df = pd.merge(merged_df, customers_df[['Customer_ID', 'Age']], on='Customer_ID')

In [28]:
merged_df.head()

Unnamed: 0,Order_Number,Transaction_Date,Transaction_Time,Store_Number,Customer_ID,Item_Number,Price,Age
0,142292,2024-01-14,00:43:25,STR_7,CUST3706,Item_4,5.5,52
1,99927,2023-07-07,09:33:49,STR_4,CUST3706,Item_4,5.5,52
2,99927,2023-07-07,09:33:49,STR_4,CUST3706,Item_4,5.5,52
3,129407,2024-04-04,05:15:54,STR_1,CUST3706,Item_1,6.5,52
4,129407,2024-04-04,05:15:54,STR_1,CUST3706,Item_1,6.5,52


### Modification 3

In [31]:
# Calculate average spending per customer
customer_spending = merged_df.groupby('Customer_ID').agg({
    'Price': 'mean',
    'Age': 'first'
}).reset_index()

# Create age bins
age_bins = [0, 20, 30, 40, 50, 60, 70, 80, 90, 100]
age_labels = ['0-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-90', '91+']
customer_spending['Age_Group'] = pd.cut(customer_spending['Age'], bins=age_bins, labels=age_labels, right=False)

# Calculate statistics for each age group
age_group_stats = customer_spending.groupby('Age_Group').agg({
    'Price': ['mean', 'median', 'count'],
    'Customer_ID': 'count'
}).reset_index()
age_group_stats.columns = ['Age_Group', 'Avg_Spending', 'Median_Spending', 'Transaction_Count', 'Customer_Count']

# 10. Customer Age Distribution vs. Spending

In [32]:
# Create the plot
fig = go.Figure()

# Bar chart for customer count
fig.add_trace(go.Bar(
    x=age_group_stats['Age_Group'],
    y=age_group_stats['Customer_Count'],
    name='Number of Customers',
    yaxis='y',
    offsetgroup=1,
    marker_color='lightblue',
    hovertemplate='Age Group: %{x}<br>Number of Customers: %{y}<extra></extra>'
))

# Line chart for average spending
fig.add_trace(go.Scatter(
    x=age_group_stats['Age_Group'],
    y=age_group_stats['Avg_Spending'],
    name='Average Spending',
    yaxis='y2',
    mode='lines+markers',
    line=dict(color='red', width=2),
    marker=dict(size=8),
    hovertemplate='Age Group: %{x}<br>Average Spending: $%{y:.2f}<extra></extra>'
))

# Update layout
fig.update_layout(
    title='Customer Age Distribution vs. Average Spending',
    xaxis=dict(title='Age Group'),
    yaxis=dict(title='Number of Customers', side='left', showgrid=False),
    yaxis2=dict(title='Average Spending ($)', side='right', overlaying='y', showgrid=False),
    legend=dict(x=1.1, y=1, bgcolor='rgba(255, 255, 255, 0.5)'),
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1
)

# Add hover for more details
fig.update_traces(
    hoverinfo="text",
    hoverlabel=dict(bgcolor="white", font_size=12),
    hovertemplate="Age Group: %{x}<br>" +
                  "Number of Customers: %{y}<br>" +
                  "Average Spending: $%{customdata[0]:.2f}<br>" +
                  "Median Spending: $%{customdata[1]:.2f}<br>" +
                  "Transaction Count: %{customdata[2]}<extra></extra>",
    customdata=age_group_stats[['Avg_Spending', 'Median_Spending', 'Transaction_Count']]
)

fig.show()