# Comparing sales on holidays versus non-holidays

This Python script visualizes sales data from a Walmart dataset, specifically comparing sales on holidays versus non-holidays. It uses the pandas library for data manipulation and plotly.express for creating the scatter plot.

This code snippet loads a dataset of Walmart sales, converts date information for proper plotting, and creates a scatter plot that visualizes sales figures on holidays versus non-holidays. The plot uses color coding to distinguish between sales data for holidays and non-holidays, providing insights into how sales vary across these two categories.

In [5]:
import pandas as pd
import plotly.express as px

# Load the dataset
data = pd.read_csv('../data/raw/Walmart_Store_sales.csv')

# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Create Scatter Plot
fig = px.scatter(data, x='Date', y='Weekly_Sales', color='Holiday_Flag',
                 color_continuous_scale=['gray', 'blue'],
                 title='Sales on Holidays vs Non-Holidays',
                 labels={'Holiday_Flag': 'Holiday Flag'})
fig.update_layout(yaxis_title='Weekly Sales')
fig.show()


# Comparison of Sales in first & second half

This Python script calculates and visualizes the average weekly sales for holidays versus non-holidays using a bar plot. It employs pandas for data manipulation and plotly.express for creating the bar plot.

The plot allows for a straightforward comparison of sales performance between these two categories, making it easy to see how sales differ on holidays compared to non-holidays.

In [6]:
import pandas as pd
import plotly.express as px

# Load the dataset
data = pd.read_csv('../data/raw/Walmart_Store_sales.csv')

# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Calculate average sales for holidays and non-holidays
avg_sales = data.groupby('Holiday_Flag')['Weekly_Sales'].mean().reset_index()
avg_sales.columns = ['Holiday_Flag', 'Average_Weekly_Sales']

# Create Bar Plot
fig = px.bar(avg_sales, x='Holiday_Flag', y='Average_Weekly_Sales',
             title='Average Weekly Sales: Holiday vs Non-Holiday',
             labels={'Holiday_Flag': 'Holiday Flag'})
fig.update_layout(yaxis_title='Average Weekly Sales')
fig.show()


# Correlation between specific holidays and sales (Labor day, Thanksgiving day, Super bowl, Christmas )

This script analyzes Walmart store sales data by visualizing sales on specific holidays (Labor Day, Thanksgiving Day, Super Bowl, and Christmas) compared to non-holidays. It uses pandas for data manipulation and plotly.express for creating interactive scatter plots.

 It produces scatter plots that allow for an analysis of how sales performance varies on these special days.

In [7]:
import pandas as pd
import plotly.express as px

# Load the dataset
data = pd.read_csv('../data/raw/Walmart_Store_sales.csv')

# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Filter data for the date range
start_date = '2010-02-05'
end_date = '2012-10-26'
data = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)]

# Create binary columns for specific holidays
# Here, we use approximate dates; adjust if needed
data['Labor_Day'] = data['Date'].apply(lambda x: 1 if (x.month == 9 and x.weekday() == 0) else 0)
data['Thanksgiving'] = data['Date'].apply(lambda x: 1 if (x.month == 11 and x.weekday() == 3) else 0)
data['Super_Bowl'] = data['Date'].apply(lambda x: 1 if (x.month == 2 and x.weekday() == 6) else 0)
data['Christmas'] = data['Date'].apply(lambda x: 1 if (x.month == 12 and x.day == 25) else 0)

# Create visualizations for each holiday
def plot_holiday_sales(holiday_column, holiday_name, color):
    fig = px.scatter(data, x='Date', y='Weekly_Sales', color=holiday_column,
                     color_continuous_scale=[color, 'gray'],
                     title=f'Sales on {holiday_name} vs Non-Holiday',
                     labels={holiday_column: holiday_name})
    fig.update_layout(yaxis_title='Weekly Sales')
    fig.show()

# Plot for each holiday
plot_holiday_sales('Labor_Day', 'Labor Day', 'blue')
plot_holiday_sales('Thanksgiving', 'Thanksgiving Day', 'orange')
plot_holiday_sales('Super_Bowl', 'Super Bowl', 'red')
plot_holiday_sales('Christmas', 'Christmas Day', 'green')


In [8]:
import pandas as pd
import plotly.express as px

# Load the dataset
data = pd.read_csv('../data/raw/Walmart_Store_sales.csv')

# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Filter data for the date range
start_date = '2010-02-05'
end_date = '2012-10-26'
data = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)]

# Create binary columns for specific holidays
def is_labor_day(date):
    return date.month == 9 and date.weekday() == 0

def is_thanksgiving(date):
    return date.month == 11 and date.weekday() == 3

def is_super_bowl(date):
    return date.month == 2 and date.weekday() == 6

def is_christmas(date):
    return date.month == 12 and date.day == 25

data['Labor_Day'] = data['Date'].apply(is_labor_day)
data['Thanksgiving'] = data['Date'].apply(is_thanksgiving)
data['Super_Bowl'] = data['Date'].apply(is_super_bowl)
data['Christmas'] = data['Date'].apply(is_christmas)

# Verify that holidays have been correctly identified
print("Labor Day dates:", data[data['Labor_Day'] == 1]['Date'].tolist())
print("Thanksgiving dates:", data[data['Thanksgiving'] == 1]['Date'].tolist())
print("Super Bowl dates:", data[data['Super_Bowl'] == 1]['Date'].tolist())
print("Christmas dates:", data[data['Christmas'] == 1]['Date'].tolist())

# Melt the DataFrame for plotting
data_melted = data.melt(id_vars=['Date', 'Weekly_Sales'], value_vars=['Labor_Day', 'Thanksgiving', 'Super_Bowl', 'Christmas'],
                        var_name='Holiday', value_name='Is_Holiday')

# Filter to include only rows where the value is True (i.e., it's a holiday)
data_melted = data_melted[data_melted['Is_Holiday'] == True]

# Create a combined scatter plot for all holidays
fig = px.scatter(data_melted, x='Date', y='Weekly_Sales', color='Holiday',
                 color_discrete_map={'Labor_Day': 'blue', 'Thanksgiving': 'orange', 'Super_Bowl': 'red', 'Christmas': 'green'},
                 title='Sales on Specific Holidays',
                 labels={'Holiday': 'Holiday'})
fig.update_layout(yaxis_title='Weekly Sales')
fig.show()


Labor Day dates: []
Thanksgiving dates: []
Super Bowl dates: []
Christmas dates: []
