In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 데이터 불러오기
```
1. train.csv : 학습 데이터
id : 샘플 아이디
Store : 쇼핑몰 지점
Date : 주 단위(Weekly) 날짜
Temperature : 해당 쇼핑몰 주변 기온
Fuel_Price : 해당 쇼핑몰 주변 연료 가격
Promotion 1~5 : 해당 쇼핑몰의 비식별화된 프로모션 정보
Unemployment : 해당 쇼핑몰 지역의 실업률
IsHoliday : 해당 기간의 공휴일 포함 여부
Weekly_Sales : 주간 매출액 (목표 예측값)
```

In [3]:
#data불러오기
train = pd.read_csv('/content/drive/MyDrive/2022/쇼핑몰지점/dataset/train.csv')
test = pd.read_csv('/content/drive/MyDrive/2022/쇼핑몰지점/dataset/test.csv')
submission = pd.read_csv('/content/drive/MyDrive/2022/쇼핑몰지점/dataset/sample_submission.csv')

In [4]:
# 잘불러와졌나 확인
train

Unnamed: 0,id,Store,Date,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,IsHoliday,Weekly_Sales
0,1,1,05/02/2010,42.31,2.572,,,,,,8.106,False,1643690.90
1,2,1,12/02/2010,38.51,2.548,,,,,,8.106,True,1641957.44
2,3,1,19/02/2010,39.93,2.514,,,,,,8.106,False,1611968.17
3,4,1,26/02/2010,46.63,2.561,,,,,,8.106,False,1409727.59
4,5,1,05/03/2010,46.50,2.625,,,,,,8.106,False,1554806.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6250,6251,45,31/08/2012,75.09,3.867,23641.30,6.00,92.93,6988.31,3992.13,8.684,False,734297.87
6251,6252,45,07/09/2012,75.70,3.911,11024.45,12.80,52.63,1854.77,2055.70,8.684,True,766512.66
6252,6253,45,14/09/2012,67.87,3.948,11407.95,,4.30,3421.72,5268.92,8.684,False,702238.27
6253,6254,45,21/09/2012,65.32,4.038,8452.20,92.28,63.24,2376.38,8670.40,8.684,False,723086.20


In [5]:
print(train.shape, test.shape)

(6255, 13) (180, 12)


# 데이터 전처리

In [6]:
train.info() # date와 Isholiday를 제외한 모든 변수는 숫자형이다.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6255 entries, 0 to 6254
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            6255 non-null   int64  
 1   Store         6255 non-null   int64  
 2   Date          6255 non-null   object 
 3   Temperature   6255 non-null   float64
 4   Fuel_Price    6255 non-null   float64
 5   Promotion1    2102 non-null   float64
 6   Promotion2    1592 non-null   float64
 7   Promotion3    1885 non-null   float64
 8   Promotion4    1819 non-null   float64
 9   Promotion5    2115 non-null   float64
 10  Unemployment  6255 non-null   float64
 11  IsHoliday     6255 non-null   bool   
 12  Weekly_Sales  6255 non-null   float64
dtypes: bool(1), float64(9), int64(2), object(1)
memory usage: 592.6+ KB


In [7]:
###train결측값 확인
train.isnull().sum()
# promotion 변수에서 결측치가 존재하는 것을 확인

id                 0
Store              0
Date               0
Temperature        0
Fuel_Price         0
Promotion1      4153
Promotion2      4663
Promotion3      4370
Promotion4      4436
Promotion5      4140
Unemployment       0
IsHoliday          0
Weekly_Sales       0
dtype: int64

In [8]:
# promotion의 결측치가 너무 많기 때문에 0으로 대체해준다.
train = train.fillna(0)
train
# 0으로 잘 바뀐 것을 확인할 수 있다.

Unnamed: 0,id,Store,Date,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,IsHoliday,Weekly_Sales
0,1,1,05/02/2010,42.31,2.572,0.00,0.00,0.00,0.00,0.00,8.106,False,1643690.90
1,2,1,12/02/2010,38.51,2.548,0.00,0.00,0.00,0.00,0.00,8.106,True,1641957.44
2,3,1,19/02/2010,39.93,2.514,0.00,0.00,0.00,0.00,0.00,8.106,False,1611968.17
3,4,1,26/02/2010,46.63,2.561,0.00,0.00,0.00,0.00,0.00,8.106,False,1409727.59
4,5,1,05/03/2010,46.50,2.625,0.00,0.00,0.00,0.00,0.00,8.106,False,1554806.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6250,6251,45,31/08/2012,75.09,3.867,23641.30,6.00,92.93,6988.31,3992.13,8.684,False,734297.87
6251,6252,45,07/09/2012,75.70,3.911,11024.45,12.80,52.63,1854.77,2055.70,8.684,True,766512.66
6252,6253,45,14/09/2012,67.87,3.948,11407.95,0.00,4.30,3421.72,5268.92,8.684,False,702238.27
6253,6254,45,21/09/2012,65.32,4.038,8452.20,92.28,63.24,2376.38,8670.40,8.684,False,723086.20


In [9]:
# date변수를 날짜형식으로 바꿔준 뒤, 연/월/일 로 구분해준다.
import datetime as dt
train['Date'] = train['Date'] = pd.to_datetime(train.Date, format='%d/%m/%Y')

train['Year'] = train['Date'].dt.strftime('%Y')
train['Month'] = train['Date'].dt.strftime('%m')
train['Day'] = train['Date'].dt.strftime('%d')

# 사용한 Date 변수는 더이상 필요없으므로 제거해준다.
del train['Date']

In [10]:
# 잘 바뀐 것을 확인한다.
train

Unnamed: 0,id,Store,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,IsHoliday,Weekly_Sales,Year,Month,Day
0,1,1,42.31,2.572,0.00,0.00,0.00,0.00,0.00,8.106,False,1643690.90,2010,02,05
1,2,1,38.51,2.548,0.00,0.00,0.00,0.00,0.00,8.106,True,1641957.44,2010,02,12
2,3,1,39.93,2.514,0.00,0.00,0.00,0.00,0.00,8.106,False,1611968.17,2010,02,19
3,4,1,46.63,2.561,0.00,0.00,0.00,0.00,0.00,8.106,False,1409727.59,2010,02,26
4,5,1,46.50,2.625,0.00,0.00,0.00,0.00,0.00,8.106,False,1554806.68,2010,03,05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6250,6251,45,75.09,3.867,23641.30,6.00,92.93,6988.31,3992.13,8.684,False,734297.87,2012,08,31
6251,6252,45,75.70,3.911,11024.45,12.80,52.63,1854.77,2055.70,8.684,True,766512.66,2012,09,07
6252,6253,45,67.87,3.948,11407.95,0.00,4.30,3421.72,5268.92,8.684,False,702238.27,2012,09,14
6253,6254,45,65.32,4.038,8452.20,92.28,63.24,2376.38,8670.40,8.684,False,723086.20,2012,09,21


In [11]:
# 분석 전 범주형 변수인 IsHoliday 변수를 숫자형으로 바꿔준다.
def holiday_to_number(isholiday):
    if isholiday == True:
        number = 1
    else:
        number = 0
    return number

# 이 함수를 IsHoliday 칼럼에 적용한 NumberHoliday 칼럼을 만들어준다..
train['NumberHoliday'] = train['IsHoliday'].apply(holiday_to_number)

del train['IsHoliday']
# 결과를 확인한다.
train

Unnamed: 0,id,Store,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,Weekly_Sales,Year,Month,Day,NumberHoliday
0,1,1,42.31,2.572,0.00,0.00,0.00,0.00,0.00,8.106,1643690.90,2010,02,05,0
1,2,1,38.51,2.548,0.00,0.00,0.00,0.00,0.00,8.106,1641957.44,2010,02,12,1
2,3,1,39.93,2.514,0.00,0.00,0.00,0.00,0.00,8.106,1611968.17,2010,02,19,0
3,4,1,46.63,2.561,0.00,0.00,0.00,0.00,0.00,8.106,1409727.59,2010,02,26,0
4,5,1,46.50,2.625,0.00,0.00,0.00,0.00,0.00,8.106,1554806.68,2010,03,05,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6250,6251,45,75.09,3.867,23641.30,6.00,92.93,6988.31,3992.13,8.684,734297.87,2012,08,31,0
6251,6252,45,75.70,3.911,11024.45,12.80,52.63,1854.77,2055.70,8.684,766512.66,2012,09,07,1
6252,6253,45,67.87,3.948,11407.95,0.00,4.30,3421.72,5268.92,8.684,702238.27,2012,09,14,0
6253,6254,45,65.32,4.038,8452.20,92.28,63.24,2376.38,8670.40,8.684,723086.20,2012,09,21,0


In [12]:
# 전처리 내용을 test데이터에도 적용해준다.
# 결측치 처리
test = test.fillna(0)

# Date 전처리
test['Date'] = test['Date'] = pd.to_datetime(test.Date, format='%d/%m/%Y')

test['Year'] = test['Date'].dt.strftime('%Y')
test['Month'] = test['Date'].dt.strftime('%m')
test['Day'] = test['Date'].dt.strftime('%d')

# 사용한 Date 변수는 더이상 필요없으므로 제거해준다.
del test['Date']

# IsHoliday 전처리
test['NumberHoliday'] = test['IsHoliday'].apply(holiday_to_number)

del test['IsHoliday']

In [13]:
test

Unnamed: 0,id,Store,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,Year,Month,Day,NumberHoliday
0,1,1,68.55,3.617,8077.89,0.00,18.22,3617.43,3626.14,6.573,2012,10,05,0
1,2,1,62.99,3.601,2086.18,0.00,8.11,602.36,5926.45,6.573,2012,10,12,0
2,3,1,67.97,3.594,950.33,0.00,4.93,80.25,2312.85,6.573,2012,10,19,0
3,4,1,69.16,3.506,2585.85,31.75,6.00,1057.16,1305.01,6.573,2012,10,26,0
4,5,2,70.27,3.617,6037.76,0.00,10.04,3027.37,3853.40,6.170,2012,10,05,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,176,44,46.97,3.755,38.65,2.61,0.98,0.00,457.74,5.217,2012,10,26,0
176,177,45,64.89,3.985,5046.74,0.00,18.82,2253.43,2340.01,8.667,2012,10,05,0
177,178,45,54.47,4.000,1956.28,0.00,7.89,599.32,3990.54,8.667,2012,10,12,0
178,179,45,56.47,3.969,2004.02,0.00,3.18,437.73,1537.49,8.667,2012,10,19,0


In [47]:
!pip install pycaret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycaret
  Downloading pycaret-2.3.10-py3-none-any.whl (320 kB)
[K     |████████████████████████████████| 320 kB 5.1 MB/s 
[?25hCollecting spacy<2.4.0
  Downloading spacy-2.3.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.4 MB)
[K     |████████████████████████████████| 10.4 MB 5.3 MB/s 
Collecting kmodes>=0.10.1
  Downloading kmodes-0.12.1-py2.py3-none-any.whl (20 kB)
Collecting mlxtend>=0.17.0
  Downloading mlxtend-0.20.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 43.2 MB/s 
Collecting mlflow
  Downloading mlflow-1.27.0-py3-none-any.whl (17.9 MB)
[K     |████████████████████████████████| 17.9 MB 14.7 MB/s 
Collecting pyLDAvis
  Downloading pyLDAvis-3.3.1.tar.gz (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 37.8 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build w

In [14]:
!pip install markupsafe==2.0.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting markupsafe==2.0.1
  Downloading MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (31 kB)
Installing collected packages: markupsafe
  Attempting uninstall: markupsafe
    Found existing installation: MarkupSafe 2.1.1
    Uninstalling MarkupSafe-2.1.1:
      Successfully uninstalled MarkupSafe-2.1.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandas-profiling 3.2.0 requires markupsafe~=2.1.1, but you have markupsafe 2.0.1 which is incompatible.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.[0m
Successfully installed markupsafe-2.0.1


In [16]:
train.head()

Unnamed: 0,id,Store,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,Weekly_Sales,Year,Month,Day,NumberHoliday
0,1,1,42.31,2.572,0.0,0.0,0.0,0.0,0.0,8.106,1643690.9,2010,2,5,0
1,2,1,38.51,2.548,0.0,0.0,0.0,0.0,0.0,8.106,1641957.44,2010,2,12,1
2,3,1,39.93,2.514,0.0,0.0,0.0,0.0,0.0,8.106,1611968.17,2010,2,19,0
3,4,1,46.63,2.561,0.0,0.0,0.0,0.0,0.0,8.106,1409727.59,2010,2,26,0
4,5,1,46.5,2.625,0.0,0.0,0.0,0.0,0.0,8.106,1554806.68,2010,3,5,0


In [17]:
# 예측에 필요없는 변수들은 제거해준다.
del train['id']
del test['id']

In [18]:
# 모델을 선정하기위한 pycaret이라는 라이브러리를 사용하였다.
from pycaret.regression import *
reg = setup(train, target='Weekly_Sales')

Unnamed: 0,Description,Value
0,session_id,7072
1,Target,Weekly_Sales
2,Original Data,"(6255, 14)"
3,Missing Values,False
4,Numeric Features,10
5,Categorical Features,3
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(4378, 26)"


INFO:logs:create_model_container: 0
INFO:logs:master_model_container: 0
INFO:logs:display_container: 1
INFO:logs:Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=True, features_todrop=[],
                                      id_columns=[], ml_usecase='regression',
                                      numerical_features=[],
                                      target='Weekly_Sales',
                                      time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='not_available',
                                fill_value_categorical=None,
                                fill_value_numerical=None,
                                numeric_str...
                ('scaling', 'passthrough'), ('P_transform', 'passthrough'),
                ('binn', 'passthrough'), ('rem_outliers', 'passthrough'),
                ('cluste

In [23]:
# RMSE를 기준으로 좋은 모델 3개를 선정한다.
top2 = compare_models(sort = 'RMSE',n_select = 2)
# top3 = lightgbm, rf, et

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lightgbm,Light Gradient Boosting Machine,64696.91,11814210000.0,108230.9,0.9633,0.1107,0.0732,0.186
rf,Random Forest Regressor,68281.3,19237060000.0,137668.8,0.9408,0.1023,0.0633,3.233
et,Extra Trees Regressor,75165.01,21671040000.0,146221.2,0.9328,0.1241,0.0767,2.116
dt,Decision Tree Regressor,84703.02,31352450000.0,175907.6,0.9033,0.1299,0.0762,0.049
gbr,Gradient Boosting Regressor,162647.8,45405690000.0,212709.6,0.8593,0.252,0.2146,0.638
ada,AdaBoost Regressor,414911.8,236270100000.0,485931.5,0.2656,0.5593,0.6233,0.342
knn,K Neighbors Regressor,383312.2,241661200000.0,491140.5,0.2509,0.5132,0.4993,0.082
ridge,Ridge Regression,416839.9,260277800000.0,509969.5,0.1923,0.5534,0.5849,0.034
llar,Lasso Least Angle Regression,416865.9,260285300000.0,509977.4,0.1922,0.5535,0.585,0.021
lasso,Lasso Regression,416860.0,260286600000.0,509978.4,0.1922,0.5535,0.5849,0.076


INFO:logs:create_model_container: 39
INFO:logs:master_model_container: 39
INFO:logs:display_container: 6
INFO:logs:[LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.1, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
              random_state=7072, reg_alpha=0.0, reg_lambda=0.0, silent='warn',
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0), RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=-1, oob_score=False,
   

In [24]:
# tune_model()을 사용하여 pycaret라이브러리의 'RMSE'기준으로 상위 3개의 모델을 튜닝해준다.
tuned_top2 = [tune_model(i) for i in top2]

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,100548.3487,26541240000.0,162914.8188,0.9177,0.1547,0.1119
1,113300.1265,41083420000.0,202690.4518,0.8866,0.1688,0.1177
2,100423.1341,29983850000.0,173158.4415,0.9106,0.1399,0.1004
3,98397.9479,24853460000.0,157649.8132,0.9207,0.1527,0.1068
4,92467.8199,25759170000.0,160496.6402,0.9211,0.1322,0.0953
5,100494.5432,25394180000.0,159355.5295,0.9163,0.1459,0.1057
6,101151.3442,31843760000.0,178448.1908,0.9034,0.1435,0.1014
7,93806.6612,21042340000.0,145059.7666,0.9318,0.1445,0.1039
8,85589.8273,19960510000.0,141281.68,0.933,0.1461,0.0967
9,103352.9345,34856140000.0,186697.9846,0.8921,0.1554,0.106


INFO:logs:create_model_container: 41
INFO:logs:master_model_container: 41
INFO:logs:display_container: 8
INFO:logs:RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=9, max_features=1.0, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.1,
                      min_impurity_split=None, min_samples_leaf=3,
                      min_samples_split=7, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=-1, oob_score=False,
                      random_state=7072, verbose=0, warm_start=False)
INFO:logs:tune_model() succesfully completed......................................


In [25]:
# 튜닝된 모델을 blend_model()을 활용하여 블렌딩해준다.
blender_top2 = blend_models(estimator_list=tuned_top2)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,72943.3627,14491700000.0,120381.461,0.955,0.1181,0.0819
1,80865.6161,22244440000.0,149145.6938,0.9386,0.1154,0.0795
2,73530.5508,17094820000.0,130747.1698,0.9491,0.1051,0.0732
3,68369.9984,10913260000.0,104466.5379,0.9652,0.1041,0.076
4,68009.7156,13977450000.0,118226.2827,0.9572,0.0996,0.0717
5,73835.7087,13672140000.0,116927.9271,0.9549,0.1076,0.0776
6,74315.4753,15845380000.0,125878.4176,0.952,0.1065,0.0775
7,66318.9643,9767354000.0,98829.9255,0.9683,0.0966,0.0724
8,63857.232,10813520000.0,103988.1003,0.9637,0.1158,0.076
9,75755.6849,19798380000.0,140706.705,0.9387,0.1173,0.078


INFO:logs:create_model_container: 42
INFO:logs:master_model_container: 42
INFO:logs:display_container: 9
INFO:logs:VotingRegressor(estimators=[('lightgbm',
                             LGBMRegressor(bagging_fraction=1.0, bagging_freq=1,
                                           boosting_type='gbdt',
                                           class_weight=None,
                                           colsample_bytree=1.0,
                                           feature_fraction=0.8,
                                           importance_type='split',
                                           learning_rate=0.1, max_depth=-1,
                                           min_child_samples=51,
                                           min_child_weight=0.001,
                                           min_split_gain=0.9, n_estimators=180,
                                           n_jobs=-1, num_leaves=70,
                                           objective=None, rando...
            

In [26]:
# 그리고 finalize_model()함수를 통해 전체 데이터로 마지막 학습을 진행한다. 이후에 predict_mdodel() 함수의 파라미터로 학습된 모델 변수, 테스트할 데이터를 각각 입력해주었다.
final_model = finalize_model(blender_top2)
prediction = predict_model(final_model, data=test)

INFO:logs:Initializing finalize_model()
INFO:logs:finalize_model(estimator=VotingRegressor(estimators=[('lightgbm',
                             LGBMRegressor(bagging_fraction=1.0, bagging_freq=1,
                                           boosting_type='gbdt',
                                           class_weight=None,
                                           colsample_bytree=1.0,
                                           feature_fraction=0.8,
                                           importance_type='split',
                                           learning_rate=0.1, max_depth=-1,
                                           min_child_samples=51,
                                           min_child_weight=0.001,
                                           min_split_gain=0.9, n_estimators=180,
                                           n_jobs=-1, num_leaves=70,
                                           objective=None, rando...
                             RandomForestRegressor(b

In [27]:
prediction

Unnamed: 0,Store,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,Year,Month,Day,NumberHoliday,Label
0,1,68.55,3.617,8077.89,0.00,18.22,3617.43,3626.14,6.573,2012,10,05,0,1.670531e+06
1,1,62.99,3.601,2086.18,0.00,8.11,602.36,5926.45,6.573,2012,10,12,0,1.542293e+06
2,1,67.97,3.594,950.33,0.00,4.93,80.25,2312.85,6.573,2012,10,19,0,1.432123e+06
3,1,69.16,3.506,2585.85,31.75,6.00,1057.16,1305.01,6.573,2012,10,26,0,1.477862e+06
4,2,70.27,3.617,6037.76,0.00,10.04,3027.37,3853.40,6.170,2012,10,05,0,1.888256e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,44,46.97,3.755,38.65,2.61,0.98,0.00,457.74,5.217,2012,10,26,0,3.799010e+05
176,45,64.89,3.985,5046.74,0.00,18.82,2253.43,2340.01,8.667,2012,10,05,0,7.477059e+05
177,45,54.47,4.000,1956.28,0.00,7.89,599.32,3990.54,8.667,2012,10,12,0,6.899020e+05
178,45,56.47,3.969,2004.02,0.00,3.18,437.73,1537.49,8.667,2012,10,19,0,6.884657e+05


In [29]:
# submission 파일 제출을 위해 변수명을 변경해준다.
prediction.rename(columns = {"Label": "target"}, inplace = True)

### 제출

In [30]:
# submission 파일에 모델링을 진행한 target값을 넣어준다.
submission['Weekly_Sales'] = prediction['target']

In [31]:
# submission파일을 내보내준다.
submission.to_csv('pycaret.csv', index = False)