In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample = pd.read_csv('sample.csv', header=None)

In [4]:
train.head()

Unnamed: 0,datetime,y,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
0,2013-11-18,90,月,0,厚切りイカフライ,,,,,快晴,--,19.8
1,2013-11-19,101,火,1,手作りヒレカツ,,,,,快晴,--,17.0
2,2013-11-20,118,水,0,白身魚唐揚げ野菜あん,,,,,快晴,--,15.5
3,2013-11-21,120,木,1,若鶏ピリ辛焼,,,,,快晴,--,15.2
4,2013-11-22,130,金,1,ビッグメンチカツ,,,,,快晴,--,16.1


In [5]:
sample.head()

Unnamed: 0,0,1
0,2014-10-1,24
1,2014-10-2,71
2,2014-10-3,25
3,2014-10-6,32
4,2014-10-7,60


In [6]:
test.head()

Unnamed: 0,datetime,week,soldout,name,kcal,remarks,event,payday,weather,precipitation,temperature
0,2014-10-1,水,1,メンチカツ,420.0,,,,雨,0,20.2
1,2014-10-2,木,0,バーベキューチキン,415.0,,,,曇,--,23.9
2,2014-10-3,金,0,豚肉のマスタード焼き,405.0,,,,晴れ,--,28.7
3,2014-10-6,月,1,麻婆春雨,400.0,,,,雨,0.5,21.5
4,2014-10-7,火,0,厚揚げ肉みそ炒め,430.0,,,,晴れ,--,22.1


In [7]:
## Trainデータを作る

In [8]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   datetime       207 non-null    object 
 1   y              207 non-null    int64  
 2   week           207 non-null    object 
 3   soldout        207 non-null    int64  
 4   name           207 non-null    object 
 5   kcal           166 non-null    float64
 6   remarks        21 non-null     object 
 7   event          14 non-null     object 
 8   payday         10 non-null     float64
 9   weather        207 non-null    object 
 10  precipitation  207 non-null    object 
 11  temperature    207 non-null    float64
dtypes: float64(3), int64(2), object(7)
memory usage: 19.5+ KB


In [9]:
X = train.drop('y', axis=1)
y = train['y']
print(X.shape)
print(y.shape)

(207, 11)
(207,)


In [10]:
X['year'] = X['datetime'].apply(lambda x : x.split('-')[0])
X['month'] = X['datetime'].apply(lambda x : x.split('-')[1])
X['day'] = X['datetime'].apply(lambda x : x.split('-')[2])

In [11]:
X['week'].value_counts()

木    43
水    43
火    41
金    41
月    39
Name: week, dtype: int64

In [12]:
X['remarks'].value_counts()

お楽しみメニュー                    12
料理長のこだわりメニュー                 5
手作りの味                        1
酢豚（28食）、カレー（85食）             1
スペシャルメニュー（800円）              1
鶏のレモンペッパー焼（50食）、カレー（42食）     1
Name: remarks, dtype: int64

In [13]:
X['event'].value_counts()

ママの会             9
キャリアアップ支援セミナー    5
Name: event, dtype: int64

In [14]:
X['weather'].value_counts()

曇     53
快晴    53
晴れ    50
薄曇    25
雨     24
雷電     1
雪      1
Name: weather, dtype: int64

In [15]:
dummies = pd.get_dummies(X[['weather', 'week', 'event']])

In [16]:
dummies

Unnamed: 0,weather_快晴,weather_晴れ,weather_曇,weather_薄曇,weather_雨,weather_雪,weather_雷電,week_月,week_木,week_水,week_火,week_金,event_キャリアアップ支援セミナー,event_ママの会
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,1,0,0,0
2,1,0,0,0,0,0,0,0,0,1,0,0,0,0
3,1,0,0,0,0,0,0,0,1,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202,0,0,1,0,0,0,0,0,0,1,0,0,0,0
203,0,0,1,0,0,0,0,0,1,0,0,0,0,0
204,0,1,0,0,0,0,0,0,0,0,0,1,1,0
205,1,0,0,0,0,0,0,1,0,0,0,0,0,0


In [17]:
dummies = dummies.drop(['weather_快晴', 'week_月'], axis=1)

In [18]:
dummies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   weather_晴れ           207 non-null    uint8
 1   weather_曇            207 non-null    uint8
 2   weather_薄曇           207 non-null    uint8
 3   weather_雨            207 non-null    uint8
 4   weather_雪            207 non-null    uint8
 5   weather_雷電           207 non-null    uint8
 6   week_木               207 non-null    uint8
 7   week_水               207 non-null    uint8
 8   week_火               207 non-null    uint8
 9   week_金               207 non-null    uint8
 10  event_キャリアアップ支援セミナー  207 non-null    uint8
 11  event_ママの会           207 non-null    uint8
dtypes: uint8(12)
memory usage: 2.6 KB


In [19]:
X = pd.merge(X[['soldout', 'temperature', 'year', 'month', 'day']], dummies, left_index=True, right_index=True)
X.head()

Unnamed: 0,soldout,temperature,year,month,day,weather_晴れ,weather_曇,weather_薄曇,weather_雨,weather_雪,weather_雷電,week_木,week_水,week_火,week_金,event_キャリアアップ支援セミナー,event_ママの会
0,0,19.8,2013,11,18,0,0,0,0,0,0,0,0,0,0,0,0
1,1,17.0,2013,11,19,0,0,0,0,0,0,0,0,1,0,0,0
2,0,15.5,2013,11,20,0,0,0,0,0,0,0,1,0,0,0,0
3,1,15.2,2013,11,21,0,0,0,0,0,0,1,0,0,0,0,0
4,1,16.1,2013,11,22,0,0,0,0,0,0,0,0,0,1,0,0


In [20]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   soldout              207 non-null    int64  
 1   temperature          207 non-null    float64
 2   year                 207 non-null    object 
 3   month                207 non-null    object 
 4   day                  207 non-null    object 
 5   weather_晴れ           207 non-null    uint8  
 6   weather_曇            207 non-null    uint8  
 7   weather_薄曇           207 non-null    uint8  
 8   weather_雨            207 non-null    uint8  
 9   weather_雪            207 non-null    uint8  
 10  weather_雷電           207 non-null    uint8  
 11  week_木               207 non-null    uint8  
 12  week_水               207 non-null    uint8  
 13  week_火               207 non-null    uint8  
 14  week_金               207 non-null    uint8  
 15  event_キャリアアップ支援セミナー  207 non-null    uin

In [21]:
train_X, test_X, train_Y, test_Y = train_test_split(X, y)

In [22]:
train_X = torch.from_numpy(np.array(train_X, dtype='float32')).float()
train_Y = torch.from_numpy(np.array(train_Y, dtype='long')).long()
test_X = torch.from_numpy(np.array(test_X, dtype='float32')).float()
test_Y = torch.from_numpy(np.array(test_Y, dtype='long')).long()



In [23]:
print(train_X.shape)
print(test_X.shape)

torch.Size([155, 17])
torch.Size([52, 17])


In [24]:
print(train_Y.shape)
print(test_Y.shape)

torch.Size([155])
torch.Size([52])


In [25]:
train = TensorDataset(train_X, train_Y)

In [26]:
train_loader = DataLoader(train, batch_size=15, shuffle=True)

In [27]:
# モデルを定義する

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(17, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 128)
        self.fc4 = nn.Linear(128, 17)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = Net()

In [29]:
criterion = nn.MSELoss() # 平均二乗和誤差
optimizer = optim.Adam(model.parameters(), lr=0.001) # 勾配降下法 SGDやAdaGradなどの改良版

for epoch in range(160):
    total_loss =  0
    
    for train_x, train_y in train_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        optimizer.zero_grad()
        output = model(train_x)
        loss = criterion(output, train_x)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    if (epoch+1) % 10 == 0:
        print(epoch+1, total_loss)

10 186.1560983657837
20 117.88030338287354
30 111.14351654052734
40 175.810715675354
50 127.40288066864014
60 128.6400489807129
70 131.0964593887329
80 120.05488300323486
90 139.6602029800415
100 122.21088457107544
110 150.1782054901123
120 149.09845781326294
130 153.75276279449463
140 132.79768562316895
150 134.5181713104248
160 111.54436254501343


In [None]:
test_x, test_y = Variable(torch.from_numpy(np.array(test_X, dtype='float32'))), Variable(torch.from_numpy(np.array(test_Y, dtype='float32')))
result = torch.max(model(test_x).data, 1)[1]
accuracy = sum(test_y.data.numpy() == result.numpy()) / len(test_y.data.numpy())

In [None]:
accuracy

In [None]:
result