In [2]:
!pip install prophet



## Import

In [3]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
import prophet

import warnings
warnings.filterwarnings(action='ignore')

## Hyperparameter Setting

In [4]:
CFG = {
    'SEED':41
}

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(CFG['SEED']) # Seed 고정

## 데이터 불러오기

In [None]:
train_data = pd.read_csv('./data/train.csv').drop(columns=['ID', '제품'])

## 데이터 전처리

In [None]:
trans_data = train_data.drop(columns=['대분류', '중분류', '소분류', '브랜드']).transpose()
trans_data.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15880,15881,15882,15883,15884,15885,15886,15887,15888,15889
2022-01-01,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-02,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-03,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-04,0,0,0,0,0,0,7,0,15,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-05,0,0,0,0,0,0,21,0,16,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-06,0,0,0,0,0,0,16,0,23,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-07,0,0,0,0,0,0,18,0,12,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-08,0,0,0,0,0,0,19,0,9,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-09,0,0,0,0,0,0,21,0,2,0,...,0,0,0,0,0,0,0,0,0,0
2022-01-10,0,0,0,0,0,0,18,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
pred_df = pd.DataFrame({
    'date': pd.date_range('2023-04-05', periods=21)
})

Unnamed: 0,date
0,2023-04-05
1,2023-04-06
2,2023-04-07
3,2023-04-08
4,2023-04-09
5,2023-04-10
6,2023-04-11
7,2023-04-12
8,2023-04-13
9,2023-04-14


## Create training and testing data

In [None]:
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
plt.style.use(['seaborn-white'])

train_start_dt = '2022-01-01'
test_start_dt = '2023-01-01'
err_list = []

for i in tqdm(range(15890)):
    ID = i

    sample = pd.DataFrame({
        'date': pd.to_datetime(trans_data.index),
        'sales': trans_data.iloc[:,ID].values
    })

    train_data = sample[(sample['date'] >= train_start_dt) & (sample['date'] < test_start_dt)]
    test_data = sample[sample['date'] >= test_start_dt]

    train_X = train_data['date']
    train_y = train_data['sales']

    test_X = pd.date_range('2023-04-05', periods=21)


    model = Pipeline([
        ("scaler", MinMaxScaler()),
        ("RBF_SVM", SVR(kernel="rbf", C=10, epsilon=0.05))
    ])

    model.fit(train_X.values.reshape(-1, 1), train_y)

    pred = model.predict(test_X.values.reshape(-1, 1))
    pred[pred < 0] = 0
    pred = np.round(pred)

    pred_df[ID] = pred

np.mean(err_list)


  0%|          | 0/15890 [00:00<?, ?it/s]

nan

In [None]:
submit = pd.read_csv('./data/sample_submission.csv')
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
submit_pred = pred_df.drop(columns=['date']).transpose().astype('int')
submit_pred

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,4,4,4,4,4,4,4,3,3,3,...,3,3,3,3,3,3,3,3,3,3
1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
3,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15885,1,1,1,1,1,1,1,1,1,1,...,1,1,2,2,2,2,2,2,2,2
15886,3,3,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
15887,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,1,1
15888,1,1,1,1,1,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2


In [None]:
submit.iloc[:,1:] = submit_pred
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,4,4,4,4,4,4,4,3,3,...,3,3,3,3,3,3,3,3,3,3
1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,2,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
3,3,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
submit.to_csv('./svm_submit.csv', index=False)