# お弁当の需要予想

In [45]:
import pandas as pd
import numpy as np


### データセットの読み込み

In [46]:
train = pd.read_csv("./train.csv")
test = pd.read_csv("./test.csv")
sample = pd.read_csv("./sample.csv",header=None)
print("Data Shapes")
print("Train:",train.shape, "Test:",test.shape, "Sample:",sample.shape)

Data Shapes
Train: (207, 12) Test: (40, 11) Sample: (40, 2)


### データフレームで中身を検証

In [47]:
train.index = pd.to_datetime(train["datetime"])
train.isnull().sum()

datetime           0
y                  0
week               0
soldout            0
name               0
kcal              41
remarks          186
event            193
payday           197
weather            0
precipitation      0
temperature        0
dtype: int64

In [48]:
train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,,,,,快晴,--,19.8
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,,,,,快晴,--,17.0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,,,,,快晴,--,15.5
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,,,,,快晴,--,15.2
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,,,,,快晴,--,16.1


### Nanを0に置換

In [49]:
train = train.fillna(0)
test = test.fillna(0)
train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,--,19.8
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,--,17.0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,--,15.5
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,--,15.2
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,--,16.1


In [50]:
train["fun"] = train["remarks"].apply(lambda x: 1 if x=="お楽しみメニュー" else 0)
train["curry"] = train["name"].apply(lambda x : 1 if x.find("カレー")>=0 else 0)
test["fun"] = test["remarks"].apply(lambda x: 1 if x=="お楽しみメニュー" else 0)
test["curry"] = test["name"].apply(lambda x : 1 if x.find("カレー")>=0 else 0)
print(train[30:40])
print(test[30:40])


             datetime    y week  soldout          name   kcal remarks event  \
datetime                                                                      
2014-01-09   2014-1-9  129    木        1        鶏チリソース  435.0       0     0   
2014-01-10  2014-1-10   87    金        0      手作りロースカツ  440.0       0     0   
2014-01-14  2014-1-14  129    火        1   鶏の照り焼きマスタード  376.0       0     0   
2014-01-15  2014-1-15  134    水        0        さんま辛味焼  450.0       0     0   
2014-01-16  2014-1-16  107    木        0  カレイ唐揚げ野菜あんかけ  415.0       0     0   
2014-01-17  2014-1-17   85    金        1           回鍋肉  430.0       0  ママの会   
2014-01-20  2014-1-20  126    月        1    ジューシーメンチカツ  375.0       0     0   
2014-01-21  2014-1-21  129    火        1       サバ焼味噌掛け  447.0       0     0   
2014-01-22  2014-1-22  126    水        1   手作りひれかつとカレー  426.0       0     0   
2014-01-23  2014-1-23  107    木        0            酢豚  400.0       0     0   

            payday weather precipitation  temperatu

### 降水量の操作

In [51]:
train.loc[train["precipitation"] == "--","precipitation"] = 0
test.loc[test["precipitation"] == "--","precipitation"] = 0
train["precipitation"] = train['precipitation'].astype(np.float64)
test["precipitation"] = test['precipitation'].astype(np.float64)
train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,fun,curry
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,0,0
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,0,0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,0,0
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,0,0
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,0,0


### 天気のデータを数値に置換

In [52]:
u = train["weather"].unique()
print(u)
print(type(u))
u = test["weather"].unique()
print(u)
print(type(u))

['快晴' '曇' '晴れ' '薄曇' '雨' '雪' '雷電']
<class 'numpy.ndarray'>
['雨' '曇' '晴れ' '薄曇' '快晴']
<class 'numpy.ndarray'>


In [53]:
train["weather_number"] = 0
test["weather_number"] = 0
train.head()


Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,fun,curry,weather_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,0,0,0
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,0,0,0
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,0,0,0
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,0,0,0
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,0,0,0


In [54]:
train.loc[train["weather"] == "快晴","weather_number"] = 1
train.loc[train["weather"] == "曇","weather_number"] = 2
train.loc[train["weather"] == "晴れ","weather_number"] = 3
train.loc[train["weather"] == "薄曇","weather_number"] = 4
train.loc[train["weather"] == "雨","weather_number"] = 5
train.loc[train["weather"] == "雪","weather_number"] = 6
train.loc[train["weather"] == "雷電","weather_number"] = 7

test.loc[test["weather"] == "快晴","weather_number"] = 1
test.loc[test["weather"] == "曇","weather_number"] = 2
test.loc[test["weather"] == "晴れ","weather_number"] = 3
test.loc[test["weather"] == "薄曇","weather_number"] = 4
test.loc[test["weather"] == "雨","weather_number"] = 5

train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,fun,curry,weather_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,0,0,1
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,0,0,1
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,0,0,1
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,0,0,1
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,0,0,1


### 曜日データを数値に置換

In [55]:
u = train["week"].unique()
print(u)
print(type(u))

['月' '火' '水' '木' '金']
<class 'numpy.ndarray'>


In [56]:
train["week_number"] = 0
test["week_number"] = 0

In [57]:
train.loc[train["week"] == "月","week_number"] = 1
train.loc[train["week"] == "火","week_number"] = 2
train.loc[train["week"] == "水","week_number"] = 3
train.loc[train["week"] == "木","week_number"] = 4
train.loc[train["week"] == "金","week_number"] = 5

test.loc[test["week"] == "月","week_number"] = 1
test.loc[test["week"] == "火","week_number"] = 2
test.loc[test["week"] == "水","week_number"] = 3
test.loc[test["week"] == "木","week_number"] = 4
test.loc[test["week"] == "金","week_number"] = 5

train.head()

Unnamed: 0_level_0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature,fun,curry,weather_number,week_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2013-11-18,2013-11-18,90,月,0,厚切りイカフライ,0.0,0,0,0.0,快晴,0.0,19.8,0,0,1,1
2013-11-19,2013-11-19,101,火,1,手作りヒレカツ,0.0,0,0,0.0,快晴,0.0,17.0,0,0,1,2
2013-11-20,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,0.0,0,0,0.0,快晴,0.0,15.5,0,0,1,3
2013-11-21,2013-11-21,120,木,1,若鶏ピリ辛焼,0.0,0,0,0.0,快晴,0.0,15.2,0,0,1,4
2013-11-22,2013-11-22,130,金,1,ビッグメンチカツ,0.0,0,0,0.0,快晴,0.0,16.1,0,0,1,5


### 不要データの削除

In [58]:
train = train.drop(columns=["datetime","week","name","kcal","remarks","event","weather","week_number"])
test = test.drop(columns=["datetime","week","name","kcal","remarks","event","weather","week_number"])
train.head()

Unnamed: 0_level_0,y,soldout,payday,precipitation,temperature,fun,curry,weather_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-11-18,90,0,0.0,0.0,19.8,0,0,1
2013-11-19,101,1,0.0,0.0,17.0,0,0,1
2013-11-20,118,0,0.0,0.0,15.5,0,0,1
2013-11-21,120,1,0.0,0.0,15.2,0,0,1
2013-11-22,130,1,0.0,0.0,16.1,0,0,1


In [59]:
test.head()

Unnamed: 0,soldout,payday,precipitation,temperature,fun,curry,weather_number
0,1,0.0,0.0,20.2,0,0,5
1,0,0.0,0.0,23.9,0,0,2
2,0,0.0,0.0,28.7,0,0,3
3,1,0.0,0.5,21.5,0,0,5
4,0,0.0,0.0,22.1,0,0,3


### 販売数の取り出し

In [60]:
y = train["y"]
print(y)

datetime
2013-11-18     90
2013-11-19    101
2013-11-20    118
2013-11-21    120
2013-11-22    130
             ... 
2014-09-24     59
2014-09-25     50
2014-09-26     45
2014-09-29     56
2014-09-30     40
Name: y, Length: 207, dtype: int64


### トレーニングデータから販売数を削除

In [61]:
train = train.drop(columns=["y"])
train.head()

Unnamed: 0_level_0,soldout,payday,precipitation,temperature,fun,curry,weather_number
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-11-18,0,0.0,0.0,19.8,0,0,1
2013-11-19,1,0.0,0.0,17.0,0,0,1
2013-11-20,0,0.0,0.0,15.5,0,0,1
2013-11-21,1,0.0,0.0,15.2,0,0,1
2013-11-22,1,0.0,0.0,16.1,0,0,1


### 配列に変換

In [62]:
train_data = train.values
train_labels = y.values
test_data = test.values


### 特徴を正規化する（特徴の平均を減算して標準偏差で除算）

In [63]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)
train_data = (train_data - mean) / std
test_data = (test_data - mean) / std


### モデルを作成する

In [64]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', 
              loss='mse', 
              metrics=['mae'])

In [65]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                512       
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 65        
Total params: 4,737
Trainable params: 4,737
Non-trainable params: 0
_________________________________________________________________


### 学習する

In [66]:
history = model.fit(train_data, 
                    train_labels,
                    batch_size=1,
                    epochs=50,
                    verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### 誤差を調査する

In [67]:
score = model.evaluate(train_data, train_labels, verbose=0)
print()
print("Testing set Mean Abs Error: {:7.2f}".format( score[1]))


Testing set Mean Abs Error:   13.73


In [68]:
train_labels

array([ 90, 101, 118, 120, 130, 135, 145, 140, 151, 116, 151, 153, 151,
       171, 134, 165, 155, 157, 109, 111, 160, 145, 145, 151, 134, 122,
       121,  80, 131, 128, 129,  87, 129, 134, 107,  85, 126, 129, 126,
       107,  92, 126, 120, 121, 105,  92, 139, 121, 126, 111,  91, 137,
       111,  84,  86, 137,  99, 113,  85, 113, 129, 104, 104,  90,  69,
       131, 100, 110,  77,  73, 123,  89,  68,  80,  70, 129,  90,  72,
       100, 102,  77,  51,  55, 106,  84, 125,  99, 100,  90, 107,  96,
        88,  87, 128,  92,  82,  57,  57,  56, 109,  78,  63,  56,  80,
        54,  97,  64,  47,  88,  59,  58,  73,  70,  64,  58, 126,  70,
        71, 104,  54,  65,  77,  79,  74,  64, 119,  74,  59,  47,  64,
        93,  74,  56,  50,  72, 121,  76,  63,  66,  56,  57,  72,  61,
        64,  49,  74,  65,  54,  50,  53,  63,  82,  56,  54,  48, 124,
        65,  51,  53,  53,  55,  53,  56,  62,  83,  65,  52,  58,  60,
        38,  75,  54,  58,  63, 129,  45,  40,  56,  53,  56,  5

In [69]:
train_predictions = model.predict(train_data).flatten() 
myArray = np.array(train_predictions)
myArray = myArray.astype(int)
myArray

array([ 88, 109, 111, 117, 113, 112,  98, 109,  91, 119, 120, 120, 111,
       116, 113, 120, 129, 112, 119, 120, 120, 113, 122, 125, 121, 120,
       126, 119, 122, 115, 117, 127, 121, 129, 128, 126, 120, 124, 127,
       125, 118, 130, 118, 124, 110, 120, 113, 118, 133, 126, 121, 125,
       121, 121, 112, 114, 118, 120, 119, 118, 123, 122, 110, 111,  89,
       118, 118, 100, 128, 127, 126, 125,  95,  89, 117, 116,  92, 115,
       100, 101,  92,  85,  76, 118,  90, 102,  88,  90,  97, 116,  95,
        91,  81, 130,  96,  88,  70,  86, 117,  97,  94,  90,  88,  83,
        63,  83,  58,  60,  92,  73,  63,  65,  87,  69,  92, 118,  77,
        62,  98,  81,  80,  72,  60,  69,  71, 120,  59,  59,  54,  76,
        89,  69,  59,  64,  69, 120,  67,  59,  75,  54,  59,  53,  60,
        69,  55,  80,  59,  52,  68,  59,  66,  63,  54,  70,  49, 118,
        65,  54,  64,  65,  67,  54,  64,  63,  84,  65,  54,  54,  55,
        59,  63,  62,  66,  55, 128,  55,  64,  53,  63,  55,  5

### 検査データで予測する

In [70]:
test_predictions = model.predict(test_data).flatten() 
myArray = np.array(test_predictions)
myArray = myArray.astype(int)
myArray

array([ 80,  59,  54,  68,  65,  77,  65, 135,  59,  87,  84,  71,  72,
        77,  99,  90,  87,  75,  99,  89,  79,  84,  82, 107,  92, 121,
        76, 105, 106,  98, 114, 107, 113, 113, 118,  91,  93,  96, 112,
       103])

### サンプルデータから出力データを作成

In [71]:
sample.shape

(40, 2)

In [72]:
myArray = np.array(test_predictions)
myArray = myArray.astype(int)
sample['y'] = myArray
s = sample.drop(1, axis=1)
s

Unnamed: 0,0,y
0,2014-10-1,80
1,2014-10-2,59
2,2014-10-3,54
3,2014-10-6,68
4,2014-10-7,65
5,2014-10-8,77
6,2014-10-9,65
7,2014-10-10,135
8,2014-10-14,59
9,2014-10-15,87


### CSVファイル出力

In [73]:
import csv
f = open('out.csv','w',newline='')
writer = csv.writer(f)
writer.writerows(s.values)
f.close()