In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
import plotly.graph_objects as go

df = pd.read_csv(r"C:\Users\Admin\Documents\Data Intelligence Engineer project\Task Files\cleaned-dataset.csv")
df['Order Timestamp'] = pd.to_datetime(df['Order Timestamp'])
df['DayOfWeek'] = df['Order Timestamp'].dt.dayofweek
X = df[['DayOfWeek']]
y = df['Total amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
regressor = GradientBoostingRegressor()
regressor.fit(X_train, y_train)

days = pd.DataFrame({'DayOfWeek': range(7)})
predictions = regressor.predict(days)
actual = df.groupby('DayOfWeek')['Total amount'].mean()
days['DayOfWeek'] = days['DayOfWeek'].apply(lambda x: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'][x])

# Format actual and predicted values as strings with USD sign
actual_formatted = ["${:,.2f}".format(a) for a in actual]
predictions_formatted = ["${:,.2f}".format(p) for p in predictions]

# Determine offsets for labels
offset = max(actual.max(), predictions.max()) * 0.005  # 5% of the max value for offset

fig = go.Figure()
fig.add_trace(go.Scatter(x=days['DayOfWeek'], y=actual,
                    mode='lines+markers+text',
                    name='Actual Average Sales',
                    text=actual_formatted,
                    textposition='top center',
                    customdata=actual + offset,  # Adjusted position for actual values
                    hovertemplate='%{text}<extra></extra>'))
fig.add_trace(go.Scatter(x=days['DayOfWeek'], y=predictions,
                    mode='lines+markers+text',
                    name='Model Prediction',
                    text=predictions_formatted,
                    textposition='bottom center',
                    customdata=predictions - offset,  # Adjusted position for predicted values
                    hovertemplate='%{text}<extra></extra>'))
fig.update_layout(title='Predicted vs Actual Average Sales Amount by Day of Week',
                   xaxis_title='Day of Week',
                   yaxis_title='Sales Amount')
fig.show()
