In [7]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv('/Users/wahidurrahman/Downloads/retail_price.csv')

# Convert 'month_year' to datetime format for time series analysis
data['month_year'] = pd.to_datetime(data['month_year'], format='%d-%m-%Y')

# Time Series Data
time_series_total_price = data.groupby('month_year')['total_price'].sum().reset_index()
time_series_qty = data.groupby('month_year')['qty'].sum().reset_index()

# Standardizing the data for clustering
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

clustering_data = data[['total_price', 'qty', 'unit_price']]
scaler = StandardScaler()
clustering_data_scaled = scaler.fit_transform(clustering_data)

kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(clustering_data_scaled)
data['cluster'] = clusters

# Select relevant features for the regression model
features = ['qty', 'comp_1', 'comp_2', 'comp_3']
target = 'unit_price'

# Split the data into training and testing sets
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the unit prices on the test set
y_pred = model.predict(X_test)

# Calculate the mean squared error of the predictions
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Add predictions to the original dataset for visualization
data['predicted_unit_price'] = model.predict(data[features])

# Initialize the Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Retail Price Optimization Dashboard", style={'text-align': 'center'}),
    
    # Time Series Analysis
    html.Div([
        html.H2("Time Series Analysis"),
        dcc.Graph(id='total-price-over-time', figure=px.line(time_series_total_price, x='month_year', y='total_price', title='Total Price Over Time')),
        dcc.Graph(id='quantity-sold-over-time', figure=px.line(time_series_qty, x='month_year', y='qty', title='Quantity Sold Over Time')),
    ]),
    
    # Distributions
    html.Div([
        html.H2("Distributions"),
        dcc.Graph(id='total-price-distribution', figure=px.histogram(data, x='total_price', nbins=30, title='Distribution of Total Price', marginal="box")),
        dcc.Graph(id='unit-price-distribution', figure=px.histogram(data, x='unit_price', nbins=30, title='Distribution of Unit Price', marginal="box")),
    ]),
    
    # Scatter Plot
    html.Div([
        html.H2("Quantity vs. Total Price"),
        dcc.Graph(id='quantity-vs-total-price', figure=px.scatter(data, x='qty', y='total_price', title='Quantity vs. Total Price')),
    ]),
    
    # Box Plots by Category
    html.Div([
        html.H2("Box Plots by Category"),
        dcc.Graph(id='total-price-by-category', figure=px.box(data, x='total_price', y='product_category_name', title='Total Price by Category')),
        dcc.Graph(id='unit-price-by-category', figure=px.box(data, x='unit_price', y='product_category_name', title='Unit Price by Category')),
    ]),
    
    # Customer Segmentation
    html.Div([
        html.H2("Customer Segmentation"),
        dcc.Graph(id='customer-segmentation', figure=px.scatter(data, x='total_price', y='qty', color='cluster', title='Customer Segmentation based on Total Price and Quantity')),
    ]),
    
    # Competitor Analysis
    html.Div([
        html.H2("Competitor Analysis"),
        dcc.Graph(id='competitor-analysis', figure=px.box(data, y=['unit_price', 'comp_1', 'comp_2', 'comp_3'], title='Comparison of Prices with Competitors')),
    ]),

    # Predicted Prices
    html.Div([
        html.H2("Predicted Unit Prices"),
        dcc.Graph(id='predicted-unit-prices', figure=px.scatter(data, x='qty', y='predicted_unit_price', title='Predicted Unit Prices based on Quantity and Competitor Prices')),
    ]),
])

if __name__ == '__main__':
    app.run_server(debug=True)




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



Mean Squared Error: 4395.7113652895305



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

