## 1. Importing Combined Sales Data

In [592]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta


# Specify the file path
file_path = r"C:\DSA3101-Project\E-commerce-Performance-Analysis-and-Optimization\SubgroupB\price modelling\combined_sales_data.csv"

# Read the CSV file into a DataFrame
combined_sales_data = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify
combined_sales_data.head()


Unnamed: 0,month,product_category,mean_price,total_qty,pct_change_qty,pct_change_price,price_elasticity
0,2016-05-01,Accessories,10.89,12,-0.909091,0.291815,-3.115299
1,2016-06-01,Accessories,9.29,67,4.583333,-0.146924,-31.195312
2,2016-07-01,Accessories,11.29,0,-1.0,0.215285,-4.645
3,2016-09-01,Accessories,4.275714,17,-0.26087,-0.034282,7.609412
4,2016-10-01,Accessories,6.066923,43,1.529412,0.418926,3.65079


## 2. Importing Demand Forecast Data

In [594]:
# Specify the file path
file_path = r"C:\DSA3101-Project\E-commerce-Performance-Analysis-and-Optimization\SubgroupB\DemandForecasting\data\aug_16_jul_17_forecast_v2.csv"

# Read the CSV file into a DataFrame
demand_forecast_data = pd.read_csv(file_path)

demand_forecast_data['year_month'] = pd.to_datetime(demand_forecast_data['year_month'])


# Set up `dynamic_pricing_data` based on your existing combined_sales_data
dynamic_pricing_data = combined_sales_data.copy()  # Copy to avoid modifying the original data

# Group by 'month' and 'product_category', and sum up the 'forecast_qty' for each group
monthly_forecast_summary = demand_forecast_data.groupby(['year_month', 'product_category']).agg(
    total_forecast_qty=('forecast_qty', 'sum')  # Sum forecast quantities for each category per month
).reset_index()

# Display the summarized forecast data
monthly_forecast_summary.head()


Unnamed: 0,year_month,product_category,total_forecast_qty
0,2016-08-01,Accessories,19.0
1,2016-08-01,Apparel,1979.0
2,2016-08-01,Bags,984.0
3,2016-08-01,Drinkware,3471.0
4,2016-08-01,Electronics,1029.0


## 3. Merging Combined Sales and Demand Forecast Data

In [596]:
# Convert 'month' in combined_sales_data to "YYYY-MM" string format
combined_sales_data['month'] = pd.to_datetime(combined_sales_data['month']).dt.strftime('%Y-%m')

# Convert 'year_month' in monthly_forecast_summary to "YYYY-MM" string format
monthly_forecast_summary['year_month'] = pd.to_datetime(monthly_forecast_summary['year_month']).dt.strftime('%Y-%m')

# Rename 'year_month' to 'month' in monthly_forecast_summary for consistency
monthly_forecast_summary.rename(columns={'year_month': 'month'}, inplace=True)

# Merge the two DataFrames on 'month' and 'product_category'
merged_data = pd.merge(
    combined_sales_data,
    monthly_forecast_summary,
    on=['month', 'product_category'],
    how='left'
)

# Display the first few rows of the merged DataFrame to verify
merged_data.head()

Unnamed: 0,month,product_category,mean_price,total_qty,pct_change_qty,pct_change_price,price_elasticity,total_forecast_qty
0,2016-05,Accessories,10.89,12,-0.909091,0.291815,-3.115299,
1,2016-06,Accessories,9.29,67,4.583333,-0.146924,-31.195312,
2,2016-07,Accessories,11.29,0,-1.0,0.215285,-4.645,
3,2016-09,Accessories,4.275714,17,-0.26087,-0.034282,7.609412,18.0
4,2016-10,Accessories,6.066923,43,1.529412,0.418926,3.65079,24.0


## 4. Importing Amazon FY20-21 Sales Data

In [598]:
# Load the data with low_memory set to False
file_path = r"C:\DSA3101-Project\E-commerce-Performance-Analysis-and-Optimization\SubgroupB\price modelling\Amazon Sales FY2020-21.csv"
amazon_data = pd.read_csv(file_path, low_memory=False)

## 5. Mapping Amazon Product Categories to match Google Categories

In [600]:
# Filter for completed orders only
completed_orders = amazon_data[amazon_data['status'].str.lower() == 'complete']

# Select relevant columns and create a copy to avoid warnings
relevant_columns = completed_orders[['order_date', 'qty_ordered', 'price', 'category']].copy()

# Define the mapping from Amazon categories to Google categories
category_mapping = {
    "Men's Fashion": 'Apparel',
    'Appliances': 'Housewares',
    'Home & Living': 'Housewares',
    'Health & Sports': 'Lifestyle',
    'Beauty & Grooming': 'Lifestyle',
    'Mobiles & Tablets': 'Electronics',
    "Women's Fashion": 'Apparel',
    'Soghaat': 'Others', 
    'Kids & Baby': 'Lifestyle',
    'Superstore': 'Housewares',
    'Entertainment': 'Fun',
    'Computing': 'Electronics',
    'Others': 'Others',
    'Books': 'Office',
    'School & Education': 'Others'
}

# Apply the category mapping to create 'product_category' column based on the 'category' column
relevant_columns.loc[:, 'product_category'] = relevant_columns['category'].map(category_mapping)

# Verify the updated categories in Amazon data
print("Mapped Product Categories:", relevant_columns['product_category'].unique())

Mapped Product Categories: ['Apparel' 'Housewares' 'Lifestyle' 'Electronics' 'Others' 'Fun' 'Office']


## 6. Getting Total Quantity and Mean Price for Amazon Products for each Month

In [602]:
# Ensure 'order_date' is in datetime format
relevant_columns['order_date'] = pd.to_datetime(relevant_columns['order_date'], dayfirst=True)  # Assuming day-first format based on your previous examples

# Extract month and year for grouping
relevant_columns['year_month'] = relevant_columns['order_date'].dt.to_period('M')

# Group by 'year_month' and 'product_category', and calculate total quantity and mean price
amazon_monthly_category_summary = relevant_columns.groupby(['year_month', 'product_category']).agg(
    total_qty_ordered=('qty_ordered', 'sum'),
    mean_price=('price', 'mean')
).reset_index()

# Display the result
print(amazon_monthly_category_summary)


   year_month product_category  total_qty_ordered   mean_price
0     2020-10          Apparel               2201   116.978046
1     2020-10      Electronics               1356   527.955651
2     2020-10              Fun                139  1913.350122
3     2020-10       Housewares               1115   195.666401
4     2020-10        Lifestyle               1252   139.315241
..        ...              ...                ...          ...
72    2021-08        Lifestyle                 83    52.100000
73    2021-08           Others                101    10.050000
74    2021-09          Apparel                 21    69.466667
75    2021-09       Housewares                  6    34.900000
76    2021-09        Lifestyle                 20    24.622222

[77 rows x 4 columns]


## 7. Scaling Amazon Data to match Time Period for Google Data

In [604]:
# Ensure 'year_month' is in datetime format
amazon_monthly_category_summary['year_month'] = amazon_monthly_category_summary['year_month'].dt.to_timestamp()

# Adjust the year by subtracting 4 years to align with Google data timeframe
amazon_monthly_category_summary['adjusted_year_month'] = amazon_monthly_category_summary['year_month'].apply(lambda x: x - relativedelta(years=4))

# Rename 'adjusted_year_month' to 'month' for alignment with Google data
amazon_monthly_category_summary.rename(columns={'adjusted_year_month': 'month'}, inplace=True)

# Rename 'adjusted_year_month' to 'month' for alignment with Google data
amazon_monthly_category_summary.rename(columns={'total_qty_ordered': 'total_qty'}, inplace=True)

# Display the first few rows to verify the adjustments
print(amazon_monthly_category_summary)


   year_month product_category  total_qty   mean_price      month
0  2020-10-01          Apparel       2201   116.978046 2016-10-01
1  2020-10-01      Electronics       1356   527.955651 2016-10-01
2  2020-10-01              Fun        139  1913.350122 2016-10-01
3  2020-10-01       Housewares       1115   195.666401 2016-10-01
4  2020-10-01        Lifestyle       1252   139.315241 2016-10-01
..        ...              ...        ...          ...        ...
72 2021-08-01        Lifestyle         83    52.100000 2017-08-01
73 2021-08-01           Others        101    10.050000 2017-08-01
74 2021-09-01          Apparel         21    69.466667 2017-09-01
75 2021-09-01       Housewares          6    34.900000 2017-09-01
76 2021-09-01        Lifestyle         20    24.622222 2017-09-01

[77 rows x 5 columns]


## 8. Merging Google Data with Amazon Data

In [606]:
# Convert 'month' columns in both DataFrames to a common string format
amazon_monthly_category_summary['month'] = amazon_monthly_category_summary['month'].dt.strftime('%Y-%m')

# Now, perform the left join
final_data = merged_data.merge(
    amazon_monthly_category_summary[['month', 'product_category', 'total_qty', 'mean_price']],
    on=['month', 'product_category'],
    how='left',
    suffixes=('_google', '_amazon')
)

# Display the first few rows of the merged DataFrame to verify the join
print(final_data)

       month product_category  mean_price_google  total_qty_google  \
0    2016-05      Accessories          10.890000                12   
1    2016-06      Accessories           9.290000                67   
2    2016-07      Accessories          11.290000                 0   
3    2016-09      Accessories           4.275714                17   
4    2016-10      Accessories           6.066923                43   
..       ...              ...                ...               ...   
165  2017-03           Office           3.360756              6924   
166  2017-04           Office           3.491310              9645   
167  2017-05           Office           3.577024              5672   
168  2017-06           Office           3.214489              7960   
169  2017-07           Office           3.495696              4076   

     pct_change_qty  pct_change_price  price_elasticity  total_forecast_qty  \
0         -0.909091          0.291815         -3.115299                 NaN   
1

## Amazon products seem to be much higher in price compared to Google products so Direct comparison is unfair.

In [608]:
# Filter the DataFrame for the 'Apparel' product category
apparel_data = final_data[final_data['product_category'] == 'Apparel']

# Display the first few rows to confirm
print(apparel_data.head())


      month product_category  mean_price_google  total_qty_google  \
14  2016-05          Apparel          22.902668              1781   
15  2016-06          Apparel          22.270417              1746   
16  2016-07          Apparel          19.267974              3119   
17  2016-08          Apparel          23.618907              2366   
18  2016-09          Apparel          25.385908              1804   

    pct_change_qty  pct_change_price  price_elasticity  total_forecast_qty  \
14       -0.322040         -0.041448          7.769834                 NaN   
15       -0.019652         -0.027606          0.711871                 NaN   
16        0.786369         -0.134818         -5.832837                 NaN   
17       -0.241424          0.225812         -1.069137              1979.0   
18       -0.237532          0.074813         -3.175006              1403.0   

    total_qty_amazon  mean_price_amazon  
14               NaN                NaN  
15               NaN            

In [623]:
# Step 1: Calculate month-over-month percentage change for Google prices
apparel_data.loc[:, 'seasonal_trend_google'] = apparel_data['mean_price_google'].pct_change()

# Step 2: Calculate Amazon price trend (percentage change) and apply smoothing
apparel_data.loc[:, 'amazon_trend'] = apparel_data['mean_price_amazon'].pct_change()
apparel_data.loc[:, 'amazon_trend_smoothed'] = apparel_data['amazon_trend'].rolling(window=3, min_periods=1).mean()

# Step 3: Define adjustment factors
elasticity_factor = -0.2  # Impact of price elasticity
trend_factor = 0.3        # Impact of Google’s own seasonal trend
competitor_factor = 0.3   # Impact of Amazon's seasonal trend
demand_factor = 0.2       # Impact of demand forecasts

# Step 4: Calculate adjusted price, considering if price elasticity is too high (> 2.5)
def calculate_adjusted_price(row):
    if row['price_elasticity'] > 2.5:
        # Consumers are highly responsive to price changes; keep the price stable
        return row['mean_price_google']
    else:
        # Calculate adjusted price using the dynamic pricing formula
        return row['mean_price_google'] * (
            1 + (elasticity_factor * row['price_elasticity']) +
            (trend_factor * row['seasonal_trend_google']) +
            (competitor_factor * row['amazon_trend_smoothed']) +
            (demand_factor * (row['total_forecast_qty'] - row['total_qty_google']) / max(row['total_qty_google'], 1))  # Avoid division by zero
        )

# Step 5: Apply the calculation with .apply and store in 'adjusted_price_google'
apparel_data.loc[:, 'adjusted_price_google'] = apparel_data.apply(calculate_adjusted_price, axis=1)

# Step 6: Apply minimum price constraint to avoid negative prices
apparel_data.loc[:, 'adjusted_price_google'] = apparel_data['adjusted_price_google'].clip(lower=0)

# Display the final adjusted prices for verification
print(apparel_data[['month', 'product_category', 'mean_price_google', 'adjusted_price_google', 'seasonal_trend_google', 'amazon_trend_smoothed']])


      month product_category  mean_price_google  adjusted_price_google  \
14  2016-05          Apparel          22.902668              22.902668   
15  2016-06          Apparel          22.270417                    NaN   
16  2016-07          Apparel          19.267974                    NaN   
17  2016-08          Apparel          23.618907                    NaN   
18  2016-09          Apparel          25.385908                    NaN   
19  2016-10          Apparel          28.820980                    NaN   
20  2016-11          Apparel          27.457189              61.617933   
21  2016-12          Apparel          30.466321              30.466321   
22  2017-01          Apparel          29.480877              29.480877   
23  2017-02          Apparel          30.163279              22.729369   
24  2017-03          Apparel          27.371402              54.941884   
25  2017-04          Apparel          23.010924              40.295039   
26  2017-05          Apparel          

In [610]:
# Specify the file path for the filtered Apparel data
apparel_file_path = r"C:\DSA3101-Project\E-commerce-Performance-Analysis-and-Optimization\SubgroupB\price modelling\apparel_data.csv"

# Save the filtered data to a CSV file
apparel_data.to_csv(apparel_file_path, index=False)

# Confirm the file has been saved
print(f"Apparel data saved to {apparel_file_path}")


Apparel data saved to C:\DSA3101-Project\E-commerce-Performance-Analysis-and-Optimization\SubgroupB\price modelling\apparel_data.csv
