In [2]:
import pandas as pd
import os

def load_data(FileName):
    xls_path = os.path.join(FileName)
    return pd.ExcelFile(xls_path)

data = pd.read_excel(load_data("test.xlsx"), 'Sheet1')

In [3]:
from sklearn.externals.joblib import load

#load label encoder and encode device name
#label_encoder = load('label_encoder.bin')
#data["Device"] = label_encoder.fit_transform(data["Device"])

#splitting the feature and label
features = data.loc[:,'Device ID':'Duration']
label = data.loc[:,'Label']

In [6]:
#load scaler and scale the data

scaler=load('std_scaler.bin')
test_data = scaler.transform(features)
test_data_label = label

In [5]:
class Environment1:
    
    def __init__(self, data, label):
        self.data = data
        self.label = label
        self.reset()
        
    def reset(self):
        self.t = 0
        self.done = False

        return self.data[self.t, :]
    
    def step(self, act):
        reward = 0
        
        # act = 0: unoccupied, 1: occupied
        if act == self.label.iloc[self.t]:
            reward +=1 
        else:
            reward -=1
  
        # set next time
        self.t += 1
        
        return self.data[self.t, :], reward, self.done # obs, reward, done

In [5]:
import chainer
import chainer.links as L
import chainer.functions as F
from chainer import serializers

class loaded_Q_Network(chainer.Chain):

        def __init__(self, input_size, hidden_size, output_size):
            super(loaded_Q_Network, self).__init__(
                fc1 = L.Linear(input_size, hidden_size),
                fc2 = L.Linear(hidden_size, hidden_size),
                fc3 = L.Linear(hidden_size, output_size)
            )

        def __call__(self, x):
            h = F.relu(self.fc1(x))
            h = F.relu(self.fc2(h))
            y = self.fc3(h)
            return y

        def reset(self):
            self.zerograds()


loaded_Q = loaded_Q_Network(input_size=4, hidden_size=100, output_size=2)

serializers.load_npz('Q.model', loaded_Q)

In [6]:
import numpy as np
test_env = Environment1(test_data,test_data_label)

feature = test_env.reset()

test_acts = []
test_rewards = []

for _ in range(len(test_env.data)-1):

    act = loaded_Q(np.array(feature, dtype=np.float32).reshape(1, -1))
    act = np.argmax(act.data)
    test_acts.append(act)

    obs, reward, done = test_env.step(act)
    test_rewards.append(reward)

    feature = obs
    
print(test_rewards.count(1)/len(test_env.data))  
#print(test_acts.count(0))

0.5910224438902744


In [7]:
print(test_acts)

[1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 