In [1]:
import pandas as pd
from pandas.tseries.offsets import QuarterBegin
import numpy as np

In [2]:
df = pd.read_csv('products.csv')
df.head()

Unnamed: 0,Product_ID,Year,Quarter,Product_Name,Product_Category,Product_Cost,Product_Price
0,1,2022,1,Action Figure,Toys,$9.99,$15.99
1,2,2022,1,Animal Figures,Toys,$9.99,$12.99
2,3,2022,1,Barrel O' Slime,Art & Crafts,$1.99,$3.99
3,4,2022,1,Chutes & Ladders,Games,$9.99,$12.99
4,5,2022,1,Classic Dominoes,Games,$7.99,$9.99


In [3]:
def generate_date_range(year, quarter):
    start_date = pd.Timestamp(f'{year}-01-01') + QuarterBegin(quarter - 1)
    end_date = start_date + pd.offsets.QuarterEnd()
    return pd.date_range(start=start_date, end=end_date, freq='D')


def generate_date_range(year, quarter):
    # Calculate start and end dates of the quarter
    if quarter == 1:
        start_date = pd.Timestamp(f'{year}-01-01')
        end_date = pd.Timestamp(f'{year}-03-31')
    elif quarter == 2:
        start_date = pd.Timestamp(f'{year}-04-01')
        end_date = pd.Timestamp(f'{year}-06-30')
    elif quarter == 3:
        start_date = pd.Timestamp(f'{year}-07-01')
        end_date = pd.Timestamp(f'{year}-09-30')
    elif quarter == 4:
        start_date = pd.Timestamp(f'{year}-10-01')
        end_date = pd.Timestamp(f'{year}-12-31')
    else:
        raise ValueError("Quarter must be between 1 and 4")
    
    # Generate daily date range
    return pd.date_range(start=start_date, end=end_date, freq='D')


In [4]:
from datetime import timedelta
expanded_rows = []
for _, row in df.iterrows():
    dates = generate_date_range(row['Year'], row['Quarter'])
    for date in dates:
        new_row = row.to_dict()
        new_row['Date'] = date
        new_row['Month'] = date.month
        expanded_rows.append(new_row)


# Parameters for the beta distribution to favor lower values
alpha, beta_param = 2, 5  # Alpha < Beta skews towards lower values

def quadratic_discount(min_discount, max_discount, size=1):
    # Generate uniformly distributed random values
    uniform_values = np.random.uniform(0, 1, size)
    # Square the uniform values to bias towards smaller values
    biased_values = uniform_values ** 2
    # Scale the biased values to the desired range
    return min_discount + (max_discount - min_discount) * biased_values

def is_special_day(date):
    year = date.year
    
    # Define special sale days or weeks
    black_friday = pd.Timestamp(f'{year}-11-01') + pd.offsets.Week(weekday=4, n=3)  # 4th Friday in November
    thanksgiving_week = pd.date_range(start=black_friday - timedelta(days=6), end=black_friday)
    cyber_monday = black_friday + timedelta(days=3)
    valentine_week = pd.date_range(start=f'{year}-02-07', end=f'{year}-02-14')
    easter_sunday = pd.Timestamp(f'{year}-03-22') + pd.offsets.Easter()  # Easter Sunday (varies yearly)
    easter_week = pd.date_range(start=easter_sunday - timedelta(days=3), end=easter_sunday + timedelta(days=3))
    independence_week = pd.date_range(start=f'{year}-07-01', end=f'{year}-07-07')
    labor_day = pd.Timestamp(f'{year}-09-01') + pd.offsets.Week(weekday=0)  # First Monday in September
    labor_day_weekend = pd.date_range(start=labor_day - timedelta(days=2), end=labor_day)
    halloween_week = pd.date_range(start=f'{year}-10-25', end=f'{year}-10-31')
    christmas_week = pd.date_range(start=f'{year}-12-22', end=f'{year}-12-28')
    new_year_week = pd.date_range(start=f'{year}-12-29', end=f'{year + 1}-01-04')
    
    # Combine all special sale periods
    special_dates = thanksgiving_week.union(easter_week).union(valentine_week).union(independence_week) \
                                      .union(labor_day_weekend).union(halloween_week) \
                                      .union(christmas_week).union(new_year_week).union(pd.date_range(cyber_monday, cyber_monday))
    
    return date in special_dates

            
daily_df = pd.DataFrame(expanded_rows)
np.random.seed(42)  # For reproducibility
discounts = []
for date in daily_df['Date']:
    if is_special_day(date):
        discount = np.random.uniform(0.07, 0.1)  # 7% to 10%
    else:
        discount = quadratic_discount(0.00, 0.07)[0]  # 0% to 7%
    discounts.append(discount)

daily_df

Unnamed: 0,Product_ID,Year,Quarter,Product_Name,Product_Category,Product_Cost,Product_Price,Date,Month
0,1,2022,1,Action Figure,Toys,$9.99,$15.99,2022-01-01,1
1,1,2022,1,Action Figure,Toys,$9.99,$15.99,2022-01-02,1
2,1,2022,1,Action Figure,Toys,$9.99,$15.99,2022-01-03,1
3,1,2022,1,Action Figure,Toys,$9.99,$15.99,2022-01-04,1
4,1,2022,1,Action Figure,Toys,$9.99,$15.99,2022-01-05,1
...,...,...,...,...,...,...,...,...,...
22325,35,2023,3,Uno Card Game,Games,$3.99,$8.49,2023-09-26,9
22326,35,2023,3,Uno Card Game,Games,$3.99,$8.49,2023-09-27,9
22327,35,2023,3,Uno Card Game,Games,$3.99,$8.49,2023-09-28,9
22328,35,2023,3,Uno Card Game,Games,$3.99,$8.49,2023-09-29,9


In [5]:
np.random.seed(42)  # For reproducibility
daily_df['Actual_Product_Price'] = daily_df['Product_Price'].str[1:].astype(float) * (1 - np.array(discounts))

# Round for presentation purposes
daily_df['Actual_Product_Price'] = daily_df['Actual_Product_Price'].round(2)

In [6]:
daily_df['Product_Cost'] = daily_df['Product_Cost'].str[1:].astype(float)
daily_df['Product_Price'] = daily_df['Product_Price'].str[1:].astype(float)

In [7]:
daily_df.to_csv('ItemPriceCost.csv',index=False)

In [8]:
print(daily_df.shape)
daily_df.head()

(22330, 10)


Unnamed: 0,Product_ID,Year,Quarter,Product_Name,Product_Category,Product_Cost,Product_Price,Date,Month,Actual_Product_Price
0,1,2022,1,Action Figure,Toys,9.99,15.99,2022-01-01,1,15.83
1,1,2022,1,Action Figure,Toys,9.99,15.99,2022-01-02,1,14.98
2,1,2022,1,Action Figure,Toys,9.99,15.99,2022-01-03,1,15.39
3,1,2022,1,Action Figure,Toys,9.99,15.99,2022-01-04,1,15.59
4,1,2022,1,Action Figure,Toys,9.99,15.99,2022-01-05,1,15.96
