In [31]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.colors as colors
pio.templates.default = "plotly_white"

In [32]:
data = pd.read_csv("Sample - Superstore.csv", encoding='latin-1')
print(data.head())

         Order ID  Order Date   Ship Date       Ship Mode Customer ID  \
0  CA-2016-152156  11-08-2016  11-11-2016    Second Class    CG-12520   
1  CA-2016-152156  11-08-2016  11-11-2016    Second Class    CG-12520   
2  CA-2016-138688  06-12-2016   6/16/2016    Second Class    DV-13045   
3  US-2015-108966  10-11-2015  10/18/2015  Standard Class    SO-20335   
4  US-2015-108966  10-11-2015  10/18/2015  Standard Class    SO-20335   

     Customer Name    Segment        Country             City       State  \
0      Claire Gute   Consumer  United States        Henderson    Kentucky   
1      Claire Gute   Consumer  United States        Henderson    Kentucky   
2  Darrin Van Huff  Corporate  United States      Los Angeles  California   
3   Sean O'Donnell   Consumer  United States  Fort Lauderdale     Florida   
4   Sean O'Donnell   Consumer  United States  Fort Lauderdale     Florida   

   ...       Product ID         Category Sub-Category  \
0  ...  FUR-BO-10001798        Furniture 

In [33]:
print(data.describe())

        Postal Code         Sales     Quantity
count   9994.000000   9994.000000  9994.000000
mean   55190.379428    229.875325     3.789574
std    32063.693350    623.248765     2.225110
min     1040.000000      0.000000     1.000000
25%    23223.000000     17.000000     2.000000
50%    56430.500000     54.500000     3.000000
75%    90008.000000    210.000000     5.000000
max    99301.000000  22638.000000    14.000000


In [34]:
data['Order Date'] = pd.to_datetime(data['Order Date'], errors='coerce')
data['Ship Date'] = pd.to_datetime(data['Ship Date'], errors='coerce')

In [35]:
data['Order Month'] = data['Order Date'].dt.month
data['Order Year'] = data['Order Date'].dt.year
data['Order Day of Week'] = data['Order Date'].dt.dayofweek

In [36]:
sales_by_month = data.groupby('Order Month')['Sales'].sum().reset_index()
fig = px.line(sales_by_month, 
            x='Order Month', 
            y='Sales', 
            title='Monthly Sales Analysis')
fig.add_scatter(x=sales_by_month['Order Month'], y=sales_by_month['Sales'], mode='lines')
fig.show()


In [37]:
sales_by_category = data.groupby('Category')['Sales'].sum().reset_index()
fig = px.pie(sales_by_category,
            values='Sales',
            names='Category',
            hole=0.5,
            color_discrete_sequence=px.colors.qualitative.Pastel)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title_text='Sales Analysis by Category', title_font=dict(size=24))

fig.show()



In [38]:
sales_by_subcategory = data.groupby('Sub-Category')['Sales'].sum().reset_index()
fig = px.bar(sales_by_subcategory,
            x='Sub-Category',
            y='Sales',
            title='Sales Analysis by Sub-Category')
fig.show()

In [39]:
data['Profit'] = pd.to_numeric(data['Profit'], errors='coerce')
profit_by_month = data.groupby('Order Month')['Profit'].sum().reset_index()
fig = px.line(profit_by_month,
            x='Order Month',
            y='Profit',
            title='Monthly Profit Analysis')
fig.show()

In [40]:
profit_by_category = data.groupby('Category')['Profit'].sum().reset_index()

fig = px.pie(profit_by_category,
            values='Profit',
            names='Category',
            hole=0.5,
            color_discrete_sequence=px.colors.qualitative.Pastel)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title_text='Profit Analysis by Category', title_font=dict(size=24))

fig.show()

In [41]:

profit_by_subcategory = data.groupby('Sub-Category')['Profit'].sum().reset_index()
fig = px.bar(profit_by_subcategory, x='Sub-Category',
            y='Profit',
            title='Profit Analysis by Sub-Category')
fig.show()

In [43]:
sales_profit_by_segment = data.groupby('Segment').agg({'Sales': 'sum', 'Profit': 'sum'}).reset_index()

color_palette = colors.qualitative.Pastel

fig = go.Figure()
fig.add_trace(go.Bar(x=sales_profit_by_segment['Segment'],
                    y=sales_profit_by_segment['Sales'],
                    name='Sales',
                    marker_color=color_palette[0]))
fig.add_trace(go.Bar(x=sales_profit_by_segment['Segment'],
                    y=sales_profit_by_segment['Profit'],
                    name='Profit',
                    marker_color=color_palette[1]))

fig.update_layout(title='Sales and Profit Analysis by Customer Segment',
                    xaxis_title='Customer Segment', yaxis_title='Amount')

fig.show()

In [44]:
sales_profit_by_segment = data.groupby('Segment').agg({'Sales': 'sum', 'Profit': 'sum'}).reset_index()
sales_profit_by_segment['Sales_to_Profit_Ratio'] = sales_profit_by_segment['Sales'] / sales_profit_by_segment['Profit']
print(sales_profit_by_segment[['Segment', 'Sales_to_Profit_Ratio']])

       Segment  Sales_to_Profit_Ratio
0     Consumer               3.820220
1    Corporate               3.889546
2  Home Office               3.799324
