In [1]:
import datetime
import numpy as np
from sklearn import metrics
import sys
from scipy import stats
import math
import os
import time
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse

# define model type ('cnnavg', 'cnnmax')
MODEL_TYPE = 'cnnmax'
MEAN = 61.9
STD = 10.9
BATCH_SIZE = 32
PADDING = 'same'
LOG_INTERVAL = 5
EPOCHS = 3
batches = 5000 // BATCH_SIZE
if MODEL_TYPE == 'cnnavg_concat':
    N_HIDDEN = 7
else:
    N_HIDDEN = 5
model_description = '128int_max_adam_max30_lr01'
CACHE_FOLDER = 'cache_{}'.format(model_description)

In [2]:
# define functions
def rescale(arr, m, s):
    arr = arr * s
    arr = arr + m
    return arr


In [3]:
# define models

class CNNAVG(nn.Module):
    def __init__(self):
        super(CNNAVG, self).__init__()
        self.kernel_size = 7
        self.padding_size = 0
        self.channel_size = 6
        self.avgpool1 = nn.AvgPool1d(kernel_size=2, stride=2)
        self.avgpool2 = nn.AvgPool1d(kernel_size=2, stride=2)
        self.avgpool3 = nn.AvgPool1d(kernel_size=2, stride=2)
        if PADDING == 'valid':
            self.conv1 = nn.Conv1d(3, self.channel_size, kernel_size=self.kernel_size, padding=self.padding_size)
            self.conv2 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                    padding=self.padding_size)
            self.conv3 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                    padding=self.padding_size)
            self.fc1 = nn.Linear(342, 16)
        else:

            self.conv1 = nn.Conv1d(3, self.channel_size, kernel_size=self.kernel_size,
                                   padding=(self.kernel_size // 2))
            self.conv2 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                   padding=(self.kernel_size // 2))
            self.conv3 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                   padding=(self.kernel_size // 2))
            self.fc1 = nn.Linear(372, 16)
        self.fc2 = nn.Linear(16, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # 32
        x = self.avgpool1(x)  # 32
        x = F.relu(self.conv2(x))
        x = self.avgpool2(x)
        y = F.relu(self.conv3(x))
        y = self.avgpool3(y)
        y = y.view(y.shape[0], -1)

        y = F.relu(self.fc1(y))
        y = F.relu(self.fc2(y))
        y = self.fc3(y)
        return y


class CNNMAX(nn.Module):
    def __init__(self):
        super(CNNMAX, self).__init__()
        self.kernel_size = 7
        self.padding_size = 0
        self.channel_size = 6
        self.maxpool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.maxpool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.maxpool3 = nn.MaxPool1d(kernel_size=2, stride=2)
        if PADDING == 'valid':
            self.conv1 = nn.Conv1d(3, self.channel_size, kernel_size=self.kernel_size, padding=self.padding_size)
            self.conv2 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                   padding=self.padding_size)
            self.conv3 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                   padding=self.padding_size)
            self.fc1 = nn.Linear(342, 16)
        else:

            self.conv1 = nn.Conv1d(3, self.channel_size, kernel_size=self.kernel_size,
                                   padding=(self.kernel_size // 2))
            self.conv2 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                   padding=(self.kernel_size // 2))
            self.conv3 = nn.Conv1d(self.channel_size, self.channel_size, kernel_size=self.kernel_size,
                                   padding=(self.kernel_size // 2))
            self.fc1 = nn.Linear(372, 16)
        self.fc2 = nn.Linear(16, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # 32
        x = self.maxpool1(x)  # 32
        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)
        y = F.relu(self.conv3(x))
        y = self.maxpool3(y)
        y = y.view(y.shape[0], -1)

        y = F.relu(self.fc1(y))
        y = F.relu(self.fc2(y))
        y = self.fc3(y)
        return y

In [4]:
def load_model(model, epoch, batch, cache_folder):
    wait_count = 0
    if batch == 5:
        batch = 10
    file_name_check = '../data/results/{}/ecg_P1_{}_{}_W0.bin'.format(cache_folder, epoch, batch)

    while not os.path.exists(file_name_check):
        wait_count = wait_count + 1
        if wait_count > 10:
            print('exit process')
            exit(0)
        else:
            print('Waiting 60s for the file to be generated : ', file_name_check)
            time.sleep(60)

    W = [[] for _ in range(N_HIDDEN + 1)]
    for l in range(N_HIDDEN + 1):
        W[l] = np.loadtxt('../data/results/{}/ecg_P1_{}_{}_W{}.bin'.format(cache_folder, epoch, batch, l))

    # Initialize bias vector with zeros.
    b = [[] for _ in range(N_HIDDEN + 1)]
    for l in range(N_HIDDEN + 1):
        b[l] = np.loadtxt('../data/results/{}/ecg_P1_{}_{}_b{}.bin'.format(cache_folder, epoch, batch, l))


    if MODEL_TYPE == 'cnnavg_concat':
        W[0] = np.transpose(W[0])
        w0_from_text = torch.from_numpy(W[0].reshape(6, 3, 7))
        model.conv1.weight = torch.nn.Parameter(w0_from_text)
        b0_from_text = torch.from_numpy(b[0])
        model.conv1.bias = torch.nn.Parameter(b0_from_text)

        W[1] = np.transpose(W[1])
        w1_from_text = torch.from_numpy(W[1].reshape(6, 6, 7))
        model.conv2.weight = torch.nn.Parameter(w1_from_text)
        b1_from_text = torch.from_numpy(b[1])
        model.conv2.bias = torch.nn.Parameter(b1_from_text)

        W[2] = np.transpose(W[2])
        w2_from_text = torch.from_numpy(W[2].reshape(6, 6, 7))
        model.conv3.weight = torch.nn.Parameter(w2_from_text)
        b2_from_text = torch.from_numpy(b[2])
        model.conv3.bias = torch.nn.Parameter(b2_from_text)


        W[3] = np.transpose(W[3])
        w_from_text = torch.from_numpy(W[3].reshape(6, 12, 7))
        model.conv4.weight = torch.nn.Parameter(w_from_text)
        b_from_text = torch.from_numpy(b[3])
        model.conv4.bias = torch.nn.Parameter(b_from_text)

        W[4] = np.transpose(W[4])
        w2_from_text = torch.from_numpy(W[4].reshape(4, 18, 7))
        model.conv5.weight = torch.nn.Parameter(w2_from_text)
        b2_from_text = torch.from_numpy(b[4])
        model.conv5.bias = torch.nn.Parameter(b2_from_text)

        W[5] = np.transpose(W[5])
        w3_from_text = torch.from_numpy(W[5])
        model.fc1.weight = torch.nn.Parameter(w3_from_text)
        b3_from_text = torch.from_numpy(b[5])
        model.fc1.bias = torch.nn.Parameter(b3_from_text)

        W[6] = np.transpose(W[6])
        w4_from_text = torch.from_numpy(W[6])
        model.fc2.weight = torch.nn.Parameter(w4_from_text)
        b4_from_text = torch.from_numpy(b[6])
        model.fc2.bias = torch.nn.Parameter(b4_from_text)

        w5_from_text = torch.from_numpy(W[7])
        w5_from_text = w5_from_text.reshape(1, 64)
        model.fc3.weight = torch.nn.Parameter(w5_from_text)
        b5_from_text = torch.from_numpy(b[7])
        model.fc3.bias = torch.nn.Parameter(b5_from_text)

    else:
        W[0] = np.transpose(W[0])
        w0_from_text = torch.from_numpy(W[0].reshape(6, 3, 7))
        model.conv1.weight = torch.nn.Parameter(w0_from_text)
        b0_from_text = torch.from_numpy(b[0])
        model.conv1.bias = torch.nn.Parameter(b0_from_text)

        W[1] = np.transpose(W[1])
        w1_from_text = torch.from_numpy(W[1].reshape(6, 6, 7))
        model.conv2.weight = torch.nn.Parameter(w1_from_text)
        b1_from_text = torch.from_numpy(b[1])
        model.conv2.bias = torch.nn.Parameter(b1_from_text)

        W[2] = np.transpose(W[2])
        w2_from_text = torch.from_numpy(W[2].reshape(6, 6, 7))
        model.conv3.weight = torch.nn.Parameter(w2_from_text)
        b2_from_text = torch.from_numpy(b[2])
        model.conv3.bias = torch.nn.Parameter(b2_from_text)

        W[3] = np.transpose(W[3])
        w3_from_text = torch.from_numpy(W[3])
        model.fc1.weight = torch.nn.Parameter(w3_from_text)

        b3_from_text = torch.from_numpy(b[3])
        model.fc1.bias = torch.nn.Parameter(b3_from_text)

        W[4] = np.transpose(W[4])
        w4_from_text = torch.from_numpy(W[4])
        model.fc2.weight = torch.nn.Parameter(w4_from_text)
        b4_from_text = torch.from_numpy(b[4])
        model.fc2.bias = torch.nn.Parameter(b4_from_text)

        w5_from_text = torch.from_numpy(W[5])
        w5_from_text = w5_from_text.reshape(1, 64)
        model.fc3.weight = torch.nn.Parameter(w5_from_text)
        b5_from_text = torch.from_numpy(b[5])
        model.fc3.bias = torch.nn.Parameter(b5_from_text)

    model.eval()

    return model

In [5]:

def report_scores(X, y, trained_model):
    y_true = []
    y_pred = []
    # y_score = []

    # DON'T NORMALIZE X
    # X = scale(X, mean_x, std_x)
    # print('Example : X - ', X[0, 0:3], 'y - ', y[0])

    reshaped_X = X.reshape(X.shape[0], 3, 500)


    with torch.no_grad():
        scores = trained_model(torch.from_numpy(reshaped_X))
        #
        # output rescale
        scores = rescale(scores, MEAN, STD)
        y = rescale(y, MEAN, STD)

        mse_loss = metrics.mean_squared_error(y, scores)

        y_true.extend(list(y))
        y_pred.extend(scores)

    return y_true, y_pred, mse_loss

In [6]:
def r_mse(y_true, y_pred, mse):
    r = stats.pearsonr(y_true, y_pred)[0]
    # r2 = r ** 2

    result_message = 'r:{:.3f}, mse:{:.3f}, std:{:.3f},{:.3f}'.format(r, mse, np.std(y_true), np.std(y_pred))
    return result_message, r

In [7]:

X_train = np.genfromtxt('../data/ecg/text_demo_5500/Xtrain', delimiter=',', dtype='float')
y_train = np.genfromtxt('../data/ecg/text_demo_5500/ytrain', delimiter=',', dtype='float')

X_test = np.genfromtxt('../data/ecg/text_demo_5500/Xtest', delimiter=',', dtype='float')
y_test = np.genfromtxt('../data/ecg/text_demo_5500/ytest', delimiter=',', dtype='float')

In [8]:
result_path = 'result_{}'.format(CACHE_FOLDER)

def scatter_plot(y_true, y_pred, message, epoch, batch):
    result = np.column_stack((y_true,y_pred))

    if not os.path.exists('{}/{}'.format(result_path, 'csv')):
        os.makedirs('{}/{}'.format(result_path, 'csv'))

    if not os.path.exists('{}/{}'.format(result_path, 'scatter')):
        os.makedirs('{}/{}'.format(result_path, 'scatter'))

    pd.DataFrame(result).to_csv("{}/csv/{}.csv".format(result_path, epoch), index=False)

    import matplotlib.lines as mlines
    fig, ax = plt.subplots()
    line = mlines.Line2D([0, 1], [0, 1], color='red')

    ax.scatter(y_pred, y_true, s=3)

    transform = ax.transAxes
    line.set_transform(transform)
    ax.add_line(line)

    plt.suptitle(message)
    plt.xlabel('Predictions')
    plt.ylabel('Actual')
    # set axes range
    plt.xlim(30, 110)
    plt.ylim(30, 110)

    plt.savefig("{}/scatter/{}_{}.png".format(result_path, epoch, batch), dpi=600)
    plt.clf()

In [9]:
result_array = []
r2_train = []
r2_test = []
mse_train = []
mse_test = []

log_batches = int(batches // LOG_INTERVAL)
step = 0

for e in range(EPOCHS):

    for i in range(log_batches):
        if i == 0:
            continue
        step = (log_batches * e + i)

        if step > batches:
            continue
        if MODEL_TYPE == 'cnnavg':
            model = CNNAVG()
        elif MODEL_TYPE == 'cnnmax':
            model = CNNMAX()
        else:
            model = CNNMAX()


        model = load_model(model, e, i * LOG_INTERVAL, CACHE_FOLDER)

        y_true_train, y_pred_train, train_mse_loss = report_scores(X_train, y_train, model)

        print('batch : {}'.format(i))


        # print('Training mse_loss: {0:.4f}'.format(train_mse_loss))

        y_true, y_pred, test_mse_loss = report_scores(X_test, y_test, model)


        _, train_r = r_mse(y_true_train, y_pred_train, train_mse_loss)


        y_true = np.array(y_true, dtype=np.float)
        y_true = y_true.flatten()
        y_pred = np.array(y_pred, dtype=np.float)
        y_pred = y_pred.flatten()
        rm, test_r = r_mse(y_true, y_pred, test_mse_loss)

        result = dict()
        result['mse_train'] = train_mse_loss
        result['mse_test'] = test_mse_loss
        result['r_train'] = train_r
        result['r_test'] = test_r
        result_array.append(result)

        if i == 30:
            scatter_plot(y_true, y_pred, rm, e, i * LOG_INTERVAL)
print('FINISH')

batch : 1
batch : 2
batch : 3
batch : 4
batch : 5
batch : 6
batch : 7
batch : 8
batch : 9
batch : 10
batch : 11
batch : 12
batch : 13
batch : 14
batch : 15
batch : 16
batch : 17
batch : 18
batch : 19
batch : 20
batch : 21
batch : 22
batch : 23
batch : 24
batch : 25
batch : 26
batch : 27
batch : 28
batch : 29
batch : 30
batch : 1
batch : 2
batch : 3
batch : 4
batch : 5
batch : 6
batch : 7
batch : 8
batch : 9
batch : 10
batch : 11
batch : 12
batch : 13
batch : 14
batch : 15
batch : 16
batch : 17
batch : 18
batch : 19
batch : 20
batch : 21
batch : 22
batch : 23
batch : 24
batch : 25
batch : 26
batch : 27
batch : 28
batch : 29
batch : 30
batch : 1
batch : 2
batch : 3
batch : 4
batch : 5
batch : 6
batch : 7
batch : 8
batch : 9
batch : 10
batch : 11
batch : 12
batch : 13
batch : 14
batch : 15
batch : 16
batch : 17
batch : 18
batch : 19
batch : 20
batch : 21
batch : 22
batch : 23
batch : 24
batch : 25
batch : 26
batch : 27
batch : 28
batch : 29
batch : 30
FINISH


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

In [10]:
import csv
csv_file = "result_{}.csv".format(model_description)
csv_columns = ['mse_train', 'mse_test', 'r_train', 'r_test']
try:
    with open(csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for data in result_array:
            writer.writerow(data)
except IOError:
    print("I/O error")
