## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [None]:
CFG = {
    'TRAIN_WINDOW_SIZE': 90, # 90일치로 학습
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':20,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':4096,
    'SEED':41
}


In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### 데이터 불러오기

In [None]:
train_data = pd.read_csv('/content/drive/MyDrive/LG_AIMERS/train.csv')#.drop(columns=['ID', '제품'])
keyword = pd.read_csv('/content/drive/MyDrive/LG_AIMERS/brand_keyword_cnt.csv')
product = pd.read_csv('/content/drive/MyDrive/LG_AIMERS/product_info.csv')
sales = pd.read_csv('/content/drive/MyDrive/LG_AIMERS/sales.csv')
submission = pd.read_csv('/content/drive/MyDrive/LG_AIMERS/sample_submission.csv')


In [None]:
train = train_data[train_data['대분류']=='B002-C001-0005']
sales = sales[sales['대분류']=='B002-C001-0005']

In [None]:
train

### 대분류 5번의 일당 전체 판매량 의 그래프

In [None]:
data = train.iloc[:,6:].sum(axis=0)

In [None]:
import matplotlib.pyplot as plt

x = data.index
y = data
plt.figure(figsize=(15, 6))
plt.plot(x, y, marker='o')

plt.xlabel('Date')
plt.ylabel('Sales')
plt.grid(True)
x_ticks_indices = range(0, len(x), len(x) // 30)  # 30개의 눈금 위치 선정
x_ticks_labels = [x[i] for i in x_ticks_indices]  # 해당 위치의 레이블 가져오기
plt.xticks(x_ticks_indices, x_ticks_labels, rotation=45)  # 눈금 설정#plt.tight_layout()
plt.show()

In [None]:
keyword = train.iloc[:,:6].merge(keyword,on='브랜드')

In [None]:
keyword = keyword.iloc[:,6:].fillna(0)

In [None]:
keyword

In [None]:
import matplotlib.pyplot as plt
data = keyword.sum(axis=0)
x = data.index
y = data
plt.figure(figsize=(15, 6))
plt.plot(x, y, marker='o')

plt.xlabel('Date')
plt.ylabel('Sales')
plt.grid(True)
x_ticks_indices = range(0, len(x), len(x) // 15)  # 30개의 눈금 위치 선정
x_ticks_labels = [x[i] for i in x_ticks_indices]  # 해당 위치의 레이블 가져오기
plt.xticks(x_ticks_indices, x_ticks_labels, rotation=45)  # 눈금 설정#plt.tight_layout()
plt.show()

In [None]:
product = train.iloc[:,:6].merge(product,on='제품')

In [None]:
product['제품특성'].iloc[50:100]

## 가격비교

In [None]:
train

In [None]:
price_data = (sales.iloc[:,6:] / train.iloc[:,6:]).fillna(0)

In [None]:
high_price = price_data.max(axis=1)

In [None]:
price_data.head(5)

In [None]:
for index, row in price_data.iterrows():
    max_value = row.max()
    price_data.loc[index] = [max_value if value == 0 else value for value in row]

In [None]:
price_data.iloc[10,:]

In [None]:
for i in range(10):
  x = price_data.iloc[i,:].index
  y = price_data.iloc[i,:].values
  plt.figure(figsize=(15, 6))
  plt.plot(x, y, marker='o')

  plt.xlabel('Date')
  plt.ylabel('price')
  plt.grid(True)
  x_ticks_indices = range(0, len(x), len(x) // 15)  # 30개의 눈금 위치 선정
  x_ticks_labels = [x[i] for i in x_ticks_indices]  # 해당 위치의 레이블 가져오기
  plt.xticks(x_ticks_indices, x_ticks_labels, rotation=45)  # 눈금 설정#plt.tight_layout()
  plt.show()

## 전체 판매량이 0 인애들


In [None]:
train

In [None]:
train[train.iloc[:,5:].mean(axis=1)==0]

In [None]:
zero_index = train[train.iloc[:,5:].mean(axis=1)==0].index

## 전체 판매량이 0인애들의 판매 데이터 -> submission1

In [None]:
submission1 = submission.iloc[zero_index]

In [None]:
#!pip install prophet
from prophet import Prophet

In [None]:
train1 = train.loc[list(set(train.index)-set(zero_index))]

In [None]:
train1.head(3)

# 2022년 판매량이 많이 저조한 경우 -> 신상품이거나 원래 잘 안팔리는 제품이라고 판단가능. -> 2022년 데이터가 필요없다고판단. -> 2023 년 데이터만 활용

In [None]:
train_2023 = train1[train1.iloc[:,6:-94].sum(axis=1)<90]

In [None]:
train_2023 = train_2023.iloc[:,-94:]

# 위에서 필터링한 데이터중, 2023년 데이터 판매량이 0 인경우~ submissio2 로 빼줌과 동시에 데이터셋에서 삭제.

In [None]:
train_2023_zero = train_2023[train_2023.mean(axis=1)==0]

In [None]:
submission2 = submission.iloc[train_2023_zero.index]

In [None]:
submission2.head(5)

In [None]:
train_2023_notzero = train_2023.loc[list(set(train_2023.index)-set(train_2023_zero.index))]

In [None]:
train_2023_notzero.std(axis=1).describe()

In [None]:
data = train_2023_notzero.T
ID_list = data.columns
datanew_columns = list(range(len(data.columns)))
data.columns = datanew_columns

# Prophet 을 활용하여 단기간 예측

In [None]:
forecast_list = []

for i in range(0,len(data.columns)):
  train_1 = data[i]
  train_1 = pd.DataFrame(train_1)
  train_1.reset_index(inplace=True)
  train_1.columns = ['ds','y']
  train_1['ds'] = pd.to_datetime(train_1['ds'])

  train_1['extra_weight'] = 1.0  # 기본 가중치
  train_1.loc[train_1['ds'] >= train_1['ds'].max() - pd.Timedelta(days=28), 'extra_weight'] = 1.0  # 마지막 1주일에 더 높은 가중치 설정

  m = Prophet(
    changepoint_prior_scale=0.3,
    weekly_seasonality=5,
    daily_seasonality = True,
    seasonality_mode='multiplicative'
    )
  m.add_regressor('extra_weight')
  m.add_seasonality(name='monthly', period=30.5, fourier_order=3)
  m.fit(train_1)
  future = m.make_future_dataframe(periods=21)
  future['extra_weight'] = 1.0  # 미래 날짜에는 기본 가중치로 설정
  forecast = m.predict(future)
  forecast_list.append(forecast['yhat'][-21:])
  fig1 = m.plot(forecast)


In [None]:
t = pd.DataFrame()
for i in range(len(forecast_list)):
  a = pd.DataFrame(forecast_list[i])
  k = a.T
  k.reset_index(inplace=True)
  k['index'] = ID_list[i]
  t = pd.concat([t,k],axis=0)

t[t<0] = 0

In [None]:
IDlist = t['index']
submission3 = submission.loc[IDlist]
t.columns = submission3.columns

In [None]:
t.reset_index(drop=True,inplace=True)
t = np.round(t)

In [None]:
submission3 = t

In [None]:
len(submission2) + len(submission3)

# 2022년 부터 다사용할 데이터

In [None]:
train_2022 = train1.loc[list(set(train1.index)-set(train_2023.index))]

In [None]:
len(train) - len(train_2022) - len(train_2023) - len(submission1)

In [None]:
len(train_2022)

# 2022년부터 데이터가 있는 경우, 최근 한달 데이터가 모두 0인 경우 -> submission4 로 0이라고 예측

In [None]:
train_2022_zero = train_2022[train_2022.iloc[:,-30:].mean(axis=1)==0]

In [None]:
submission4 = submission.iloc[train_2022_zero.index]

In [None]:
train_2022_notzero = train_2022.loc[list(set(train_2022.index)-set(train_2022_zero.index))]

# 최근 일주일간 평균이 최근 90일간 평균보다 높고 최근 90일간 평균이 최근 일주일을 제외한 90일 동안의 평균보다 클경우.

In [None]:
condition1 = train_2022_notzero.iloc[:,-7:].mean(axis=1) > train_2022_notzero.iloc[:,-90:].mean(axis=1)
condition2 = train_2022_notzero.iloc[:,-90:-7].mean(axis=1) < train_2022_notzero.iloc[:,-90:].mean(axis=1)

train_2022_stat = train_2022_notzero[condition1 & condition2]

In [None]:
selected_data = train_2022_stat.iloc[:, -7:]
row_means = selected_data.apply(lambda row: np.mean(row[row != 0]), axis=1)


In [None]:
row_means

In [None]:
submission5 = submission.iloc[train_2022_stat.index]

In [None]:
data_filled = submission5.iloc[:,1:].apply(lambda _: row_means, axis=0)


In [None]:
submission5 = pd.concat([submission5.iloc[:,0],data_filled],axis=1)

In [None]:
train_2022_notstat = train_2022.loc[list(set(train_2022_notzero.index)-set(train_2022_stat.index))]

In [None]:
train_2022_notstat

In [None]:
data = train_2022_notstat.T
data = data.iloc[6:,:]
ID_list = data.columns
datanew_columns = list(range(len(data.columns)))
data.columns = datanew_columns

In [None]:
forecast_list = []

for i in range(0,len(data.columns)):
  train_1 = data[i][data[i]!=0]
  train_1 = pd.DataFrame(train_1)
  train_1.reset_index(inplace=True)
  train_1.columns = ['ds','y']
  train_1['ds'] = pd.to_datetime(train_1['ds'])

  train_1['extra_weight'] = 1.0  # 기본 가중치
  train_1.loc[train_1['ds'] >= train_1['ds'].max() - pd.Timedelta(days=7), 'extra_weight'] = 1.0  # 마지막 1주일에 더 높은 가중치 설정

  m = Prophet(
    changepoint_prior_scale=0.3,
    weekly_seasonality=7,
    daily_seasonality = True,
    seasonality_mode='multiplicative'
    )
  m.add_regressor('extra_weight')
  m.add_seasonality(name='monthly', period=30.5, fourier_order=12)
  m.fit(train_1)
  future = m.make_future_dataframe(periods=21)
  future['extra_weight'] = 1.0  # 미래 날짜에는 기본 가중치로 설정
  forecast = m.predict(future)
  forecast_list.append(forecast['yhat'][-21:])
  fig1 = m.plot(forecast)

In [None]:
t2 = pd.DataFrame()
for i in range(len(forecast_list)):
  a = pd.DataFrame(forecast_list[i])
  k = a.T
  k.reset_index(inplace=True)
  k.columns = submission.columns
  k['ID'] = ID_list[i]
  t2 = pd.concat([t2,k],axis=0)

t2[t2<0] = 0

In [None]:
t2

In [None]:
t2.reset_index(drop=True,inplace=True)
t2 = np.round(t2)

In [None]:
t2

In [None]:
submission6 = t2

In [None]:
len(submission1) + len(submission2) + len(submission3) + len(submission4) + len(submission5) + len(submission6)

In [None]:
final_data = pd.concat([submission1,submission2,submission3,submission4,submission5,submission6],axis=0)

In [None]:
final_data.iloc[:,1:] = np.round(final_data.iloc[:,1:],0).astype(int)

In [None]:
final_submission = final_data.sort_values(by='ID')

In [None]:
final_submission

In [None]:
final_submission.to_csv('/content/drive/MyDrive/LG_AIMERS/20230901_대분류5번.csv', index=False)