In [10]:
import torch
import torch.nn.functional as F
from sklearn.cluster import KMeans
import numpy as np

import time

from device import dev
Path = ""
Log_Path = ""
from dataset import dataset
from model import RNN
from preprocess import get_transitions

from util import get_matrices
from util import blank_filling, identical_filling, empirical_filling
from util import none_regularization, linear_regularization

from evaluation import evaluation

In [11]:
DATASET = 'news'

# select the clusters number
CLUSTER = 40

# select the completion and regularization tactics
COMPLETION = empirical_filling # options: blank_filling, identical_filling, empirical_filling
REGULARIZATION = linear_regularization # options: none_regularization, linear_regularization

In [12]:
start_time = time.time()
# load model and dataset
train_dataset = dataset(DATASET, True)
test_dataset = dataset(DATASET, False)
model = torch.load(Path+DATASET+'_model.pth')
model.eval()
vocab_num = len(train_dataset.vocab)
state_num = CLUSTER + 1
print(f'vocab: {vocab_num}')
print(f'data number: {len(train_dataset.int_data)}')
print(f'Model and dataset ready. Use time:{time.time()-start_time:.1f}')

current_time = time.time()
# get rnn prediction in test set
rnn_prediction_container = []
for idx, data in enumerate(test_dataset.int_data):
    # remove 0 at the end
    while len(data) > 1 and data[-1] == 0:
        data = data[0:len(data)-1]
    data = data.reshape(-1, 1)

    model.clear_output_sequence()
    _ = model(data)
    runtime_predict = model.runtime_predict()
    runtime_data = []
    for step_data in runtime_predict:
        step_data = step_data.flatten().detach()
        runtime_prediction = F.softmax(step_data,dim=0)
        runtime_data.append(runtime_prediction.reshape(1, -1))
    runtime_data = torch.concat(runtime_data, dim=0)
    rnn_prediction = torch.argmax(runtime_data[-1])
    rnn_prediction_container.append(rnn_prediction)

transition_count, kmeans, state_weightes = get_transitions(model, train_dataset, CLUSTER)
print(f'Transitions ready. Use time:{time.time()-current_time:.1f}')

current_time = time.time()
# generate state distance
state_distance = torch.zeros((state_num, state_num),device=dev())
for p in range(state_num):
    for q in range(state_num):
        diff = state_weightes[p] - state_weightes[q]
        state_distance[p, q] = (diff * diff).sum()
#print(state_distance)


FileNotFoundError: [Errno 2] No such file or directory: 'FinalCode/Data/news/train_data_pytorch.csv'

In [None]:
transition_matrices = get_matrices(transition_count, state_distance, COMPLETION, REGULARIZATION)
print(f'Transition matrices ready. Use time:{time.time()-current_time:.1f}')
current_time = time.time()

#print(transition_matrices[1])
correct_rate = evaluation(test_dataset, transition_matrices, state_weightes, rnn_prediction_container)
print(f'correct rate: {correct_rate*100:.2f}%')
print(f'Evaluation done. Use time:{time.time()-current_time:.1f}')


print(f'Workflow done. Use time:{time.time()-start_time:.1f}')

