In [1]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import widgets, interact, Dropdown, fixed
from IPython.display import display
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go

In [2]:
# Path to go up one level from the current folder to the parent folder
parent_path = os.path.join(os.getcwd(), '..')

# Path to the target data folder
data_folder_path = os.path.join(parent_path, 'Data')

In [3]:
menu_df = pd.read_csv(os.path.join(data_folder_path, 'menu_analysis.csv'))
customers_df = pd.read_csv(os.path.join(data_folder_path, 'customers.csv'))
stores_df = pd.read_csv(os.path.join(data_folder_path, 'stores.csv'))
structure_df = pd.read_csv(os.path.join(data_folder_path, 'structure.csv'))
size_df = pd.read_csv(os.path.join(data_folder_path, 'order_size.csv'))
transactions_df = pd.read_csv(
        os.path.join(data_folder_path, "transactions.csv"),
        parse_dates=['Transaction_Date']  # Modify as needed if format issues arise
    )

### Modification 1

In [5]:
# Convert date columns to datetime
transactions_df['Transaction_Date'] = pd.to_datetime(transactions_df['Transaction_Date'])

### Modification 2

In [9]:
customers_df['Member_Since'] = pd.to_datetime(customers_df['Member_Since'])

# Customer Retention Rate

In [10]:
# Function to calculate retention rate
def calculate_retention(df, period):
    df = df.sort_values('Transaction_Date')
    df['PeriodStart'] = df['Transaction_Date'].dt.to_period(period)
    
    total_customers = df.groupby('PeriodStart')['Customer_ID'].nunique()
    retained_customers = df.groupby('PeriodStart')['Customer_ID'].apply(
        lambda x: x.drop_duplicates().isin(x.drop_duplicates().shift())
    ).groupby(level=0).sum()
    
    retention_rate = (retained_customers / total_customers.shift()).fillna(0)
    return retention_rate

# Calculate monthly and quarterly retention rates
monthly_retention = calculate_retention(transactions_df, 'M')
quarterly_retention = calculate_retention(transactions_df, 'Q')

# Create the plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=monthly_retention.index.astype(str),
    y=monthly_retention.values,
    mode='lines+markers',
    name='Monthly Retention',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=quarterly_retention.index.astype(str),
    y=quarterly_retention.values,
    mode='lines+markers',
    name='Quarterly Retention',
    line=dict(color='red')
))

fig.update_layout(
    title='Customer Retention Rate Over Time',
    xaxis_title='Time Period',
    yaxis_title='Retention Rate',
    yaxis=dict(tickformat='.0%'),
    legend_title='Retention Period',
    hovermode='x unified'
)

# Add range slider
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    )
)

fig.show()