# お弁当の需要予想

In [1]:
import pandas as pd
import numpy as np


### データセットの読み込み

In [2]:
train = pd.read_csv("./train.csv")
test = pd.read_csv("./test.csv")
sample = pd.read_csv("./sample.csv",header=None)
print("Data Shapes")
print("Train:",train.shape, "Test:",test.shape, "Sample:",sample.shape)

Data Shapes
Train: (207, 12) Test: (40, 11) Sample: (40, 2)


### データフレームで中身を検証

In [3]:
train.index = pd.to_datetime(train["datetime"])
train.isnull().sum()

datetime           0
y                  0
week               0
soldout            0
name               0
kcal              41
remarks          186
event            193
payday           197
weather            0
precipitation      0
temperature        0
dtype: int64

In [4]:
train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,,,,,快晴,--,19.8
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,,,,,快晴,--,17.0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,,,,,快晴,--,15.5
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,,,,,快晴,--,15.2
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,,,,,快晴,--,16.1


### Nanを0に置換

In [5]:
train = train.fillna(0)
test = test.fillna(0)
train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,--,19.8
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,--,17.0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,--,15.5
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,--,15.2
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,--,16.1


### 降水量の操作

In [6]:
train.loc[train["precipitation"] == "--","precipitation"] = 0
test.loc[test["precipitation"] == "--","precipitation"] = 0
train["precipitation"] = train['precipitation'].astype(np.float64)
test["precipitation"] = test['precipitation'].astype(np.float64)
train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1


### 天気のデータを数値に置換

In [7]:
u = train["weather"].unique()
print(u)
print(type(u))
u = test["weather"].unique()
print(u)
print(type(u))

['快晴' '曇' '晴れ' '薄曇' '雨' '雪' '雷電']
<class 'numpy.ndarray'>
['雨' '曇' '晴れ' '薄曇' '快晴']
<class 'numpy.ndarray'>


In [8]:
train["weather_number"] = 0
test["weather_number"] = 0
train.head()


Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,weather_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,0
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,0
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,0
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,0


In [9]:
train.loc[train["weather"] == "快晴","weather_number"] = 1
train.loc[train["weather"] == "曇","weather_number"] = 2
train.loc[train["weather"] == "晴れ","weather_number"] = 3
train.loc[train["weather"] == "薄曇","weather_number"] = 4
train.loc[train["weather"] == "雨","weather_number"] = 5
train.loc[train["weather"] == "雪","weather_number"] = 6
train.loc[train["weather"] == "雷電","weather_number"] = 7

test.loc[test["weather"] == "快晴","weather_number"] = 1
test.loc[test["weather"] == "曇","weather_number"] = 2
test.loc[test["weather"] == "晴れ","weather_number"] = 3
test.loc[test["weather"] == "薄曇","weather_number"] = 4
test.loc[test["weather"] == "雨","weather_number"] = 5

train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,weather_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,1
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,1
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,1
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,1
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,1


### 曜日データを数値に置換

In [10]:
u = train["week"].unique()
print(u)
print(type(u))

['月' '火' '水' '木' '金']
<class 'numpy.ndarray'>


In [11]:
train["week_number"] = 0
test["week_number"] = 0

In [12]:
train.loc[train["week"] == "月","week_number"] = 1
train.loc[train["week"] == "火","week_number"] = 2
train.loc[train["week"] == "水","week_number"] = 3
train.loc[train["week"] == "木","week_number"] = 4
train.loc[train["week"] == "金","week_number"] = 5

test.loc[test["week"] == "月","week_number"] = 1
test.loc[test["week"] == "火","week_number"] = 2
test.loc[test["week"] == "水","week_number"] = 3
test.loc[test["week"] == "木","week_number"] = 4
test.loc[test["week"] == "金","week_number"] = 5

train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,weather_number,week_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,1,1
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,1,2
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,1,3
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,1,4
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,1,5


### 不要データの削除

In [13]:
train = train.drop(columns=["datetime","week","name","kcal","remarks","event","weather"])
test = test.drop(columns=["datetime","week","name","kcal","remarks","event","weather"])
train.head()

Unnamed: 0_level_0,y,soldout,payday,precipitation,temperature,weather_number,week_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-11-18,90,0,0.0,0.0,19.8,1,1
2013-11-19,101,1,0.0,0.0,17.0,1,2
2013-11-20,118,0,0.0,0.0,15.5,1,3
2013-11-21,120,1,0.0,0.0,15.2,1,4
2013-11-22,130,1,0.0,0.0,16.1,1,5


In [14]:
test.head()

Unnamed: 0,soldout,payday,precipitation,temperature,weather_number,week_number
0,1,0.0,0.0,20.2,5,3
1,0,0.0,0.0,23.9,2,4
2,0,0.0,0.0,28.7,3,5
3,1,0.0,0.5,21.5,5,1
4,0,0.0,0.0,22.1,3,2


### 販売数の取り出し

In [15]:
y = train["y"]
print(y)

datetime
2013-11-18     90
2013-11-19    101
2013-11-20    118
2013-11-21    120
2013-11-22    130
             ... 
2014-09-24     59
2014-09-25     50
2014-09-26     45
2014-09-29     56
2014-09-30     40
Name: y, Length: 207, dtype: int64


### トレーニングデータから販売数を削除

In [16]:
train = train.drop(columns=["y"])
train.head()

Unnamed: 0_level_0,soldout,payday,precipitation,temperature,weather_number,week_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-11-18,0,0.0,0.0,19.8,1,1
2013-11-19,1,0.0,0.0,17.0,1,2
2013-11-20,0,0.0,0.0,15.5,1,3
2013-11-21,1,0.0,0.0,15.2,1,4
2013-11-22,1,0.0,0.0,16.1,1,5


### 配列に変換

In [17]:
train_data = train.values
train_labels = y.values
test_data = test.values


### 特徴を正規化する（特徴の平均を減算して標準偏差で除算）

In [18]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)
train_data = (train_data - mean) / std
test_data = (test_data - mean) / std


### モデルを作成する

In [22]:
#from keras.models import Sequential
#from keras.layers import Dense

#model = Sequential()
#model.add(Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
#model.add(Dense(64, activation='relu'))
#model.add(Dense(1))

#model.compile(optimizer='adam', 
#              loss='mse', 
#              metrics=['mae'])

from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization, Dropout
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam

model = Sequential()

# layers
model.add(Dense(units = 128, kernel_initializer = 'uniform', activation = 'relu', input_dim = train_data.shape[1]))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(units = 64, kernel_initializer = 'uniform', activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(units = 16, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(units = 1))

model.compile(optimizer='adam', 
              loss='mse', 
              metrics=['mae'])


ERROR! Session/line number was not unique in database. History logging moved to new session 195


In [23]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 128)               896       
_________________________________________________________________
batch_normalization_4 (Batch (None, 128)               512       
_________________________________________________________________
dropout_5 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
_________________________________________________________________
batch_normalization_5 (Batch (None, 64)                256       
_________________________________________________________________
dropout_6 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 32)               

### 学習する

In [24]:
#history = model.fit(train_data, 
#                    train_labels,
#                    batch_size=1,
#                    epochs=50,
#                    verbose=1)

history = model.fit(train_data, train_labels, batch_size = 1, epochs = 50, verbose=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### 誤差を調査する

In [25]:
score = model.evaluate(train_data, train_labels, verbose=0)
print()
print("Testing set Mean Abs Error: {:7.2f}".format( score[1]))


Testing set Mean Abs Error:   78.98


In [26]:
train_labels

array([ 90, 101, 118, 120, 130, 135, 145, 140, 151, 116, 151, 153, 151,
       171, 134, 165, 155, 157, 109, 111, 160, 145, 145, 151, 134, 122,
       121,  80, 131, 128, 129,  87, 129, 134, 107,  85, 126, 129, 126,
       107,  92, 126, 120, 121, 105,  92, 139, 121, 126, 111,  91, 137,
       111,  84,  86, 137,  99, 113,  85, 113, 129, 104, 104,  90,  69,
       131, 100, 110,  77,  73, 123,  89,  68,  80,  70, 129,  90,  72,
       100, 102,  77,  51,  55, 106,  84, 125,  99, 100,  90, 107,  96,
        88,  87, 128,  92,  82,  57,  57,  56, 109,  78,  63,  56,  80,
        54,  97,  64,  47,  88,  59,  58,  73,  70,  64,  58, 126,  70,
        71, 104,  54,  65,  77,  79,  74,  64, 119,  74,  59,  47,  64,
        93,  74,  56,  50,  72, 121,  76,  63,  66,  56,  57,  72,  61,
        64,  49,  74,  65,  54,  50,  53,  63,  82,  56,  54,  48, 124,
        65,  51,  53,  53,  55,  53,  56,  62,  83,  65,  52,  58,  60,
        38,  75,  54,  58,  63, 129,  45,  40,  56,  53,  56,  5

In [27]:
train_predictions = model.predict(train_data).flatten() 
myArray = np.array(train_predictions)
myArray = myArray.astype(int)
myArray

array([ 7,  5,  6,  5,  5,  4,  4,  7,  9,  2,  6,  4,  7,  4,  1,  4, 11,
        6,  5,  4, 12,  8,  5, 10,  5, 10,  7,  6, 12,  6,  5, 10,  4,  9,
        7,  4,  3,  6,  7,  5,  4,  8,  9,  5,  5,  4,  5,  5,  8,  5,  5,
       12,  8,  6,  5,  5,  7, 11,  5,  2,  5, 10,  7,  6,  8, 13,  4,  6,
        7,  4, 12, 11,  6,  7,  5, 11,  7,  6,  8,  8,  3,  6, 11,  2,  6,
        6,  9,  7,  8, 11,  5,  7, 14,  6,  6,  3,  8,  4,  5,  8,  6,  4,
        8,  8,  4,  5, 10, 10,  7,  9, 20,  6,  4,  9,  5,  4,  5,  7,  0,
        4,  8,  6,  4,  9,  7, 10,  6,  7,  7,  6, -4,  3, 12,  4,  8, 11,
        7,  7,  6, 11, 10,  3,  4,  7,  8,  9,  6,  5,  6, 10,  8,  6,  6,
        7, 19, 11,  7,  6, 12, 12, 13,  6, 10, 14, 11,  8,  6, 10, 10, 10,
        8,  7, 11, 10, 19,  5,  9,  4,  9,  9, 11, 12,  3,  7,  6, 10,  8,
        5,  8,  7,  9, 11,  5,  5, 15,  6, 11,  6,  6,  7,  9,  3,  6,  8,
       11,  5,  5])

### 検査データで予測する

In [33]:
test_predictions = model.predict(test_data).flatten() 
myArray = np.array(test_predictions)
myArray = myArray.astype(int)
myArray

array([ 71,  61,  73,  86,  65,  58,  64,  72,  50,  83,  72,  84,  78,
        79,  87,  78,  90,  81,  92,  76,  69,  81,  77,  94,  78,  73,
        86,  95,  89,  84,  98, 117, 105,  93,  99, 100, 108, 100,  94,
        90])

### サンプルデータから出力データを作成

In [34]:
sample.shape

(40, 2)

In [35]:
myArray = np.array(test_predictions)
myArray = myArray.astype(int)
sample['y'] = myArray
s = sample.drop(1, axis=1)
s

Unnamed: 0,0,y
0,2014-10-1,71
1,2014-10-2,61
2,2014-10-3,73
3,2014-10-6,86
4,2014-10-7,65
5,2014-10-8,58
6,2014-10-9,64
7,2014-10-10,72
8,2014-10-14,50
9,2014-10-15,83


### CSVファイル出力

In [302]:
import csv
f = open('out.csv','w',newline='')
writer = csv.writer(f)
writer.writerows(s.values)
f.close()