In [46]:
import numpy as np 
import pandas as pd 
import json
from pandas.io.json import json_normalize
import os
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import torch
import random
from torch.nn import Linear, LayerNorm, ReLU, Dropout
# from torch_geometric.nn import ChebConv, NNConv, DeepGCNLayer
# from torch_geometric.data import Data, DataLoader

import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm
from time import perf_counter
import optuna
from sklearn.preprocessing import MinMaxScaler

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
print(device)

cpu


In [4]:

train = pd.read_json('../train.json', lines=True)
test = pd.read_json('../test.json', lines=True)

In [5]:
train=train.set_index("index")
test=test.set_index("index")

In [6]:
train=train[train["signal_to_noise"]>=1]

In [7]:
train=train.assign(pair_prob=[np.load('../bpps/'+row['id']+'.npy') for index,row in train.iterrows()]) 

In [11]:
test=test.assign(pair_prob=[np.load('../bpps/'+row['id']+'.npy') for index,row in test.iterrows()]) 

In [8]:
bpps_nb_mean = 0.077522 # mean of bpps_nb across all training data
bpps_nb_std = 0.08914  
error_mean_limit = 0.5

In [9]:
target_col = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

rna_dict    = {x:i for i, x in enumerate('ACGU')} #4
struct_dict = {x:i for i, x in enumerate('().')}  #3
loop_dict   = {x:i for i, x in enumerate('BEHIMSX')}#7

In [12]:
pair_threshold = 0.15
pair_prob = train["pair_prob"].values
def create_adj(index):
    mat = np.array(pair_prob[index])
    shape = mat.shape
    for i in range(shape[0]):
        for j in range(shape[1]):
            if(i==j):
                mat[i][j] = 1
                continue
            if(mat[i][j]>pair_threshold):
                mat[i][j]=1
            else:
                mat[i][j]=0
    return mat

test_pair_prob = test["pair_prob"].values
def test_create_adj(index):
    mat = np.array(test_pair_prob[index])
    shape = mat.shape
    for i in range(shape[0]):
        for j in range(shape[1]):
            if(i==j):
                mat[i][j] = 1
                continue
            if(mat[i][j]>pair_threshold):
                mat[i][j]=1
            else:
                mat[i][j]=0
    return mat
def calc_error_mean(row):
    reactivity_error = row['reactivity_error']
    deg_error_Mg_pH10 = row['deg_error_Mg_pH10']
    deg_error_Mg_50C = row['deg_error_Mg_50C']

    return np.mean(np.abs(reactivity_error) +
                   np.abs(deg_error_Mg_pH10) + \
                   np.abs(deg_error_Mg_50C)) / 3

def calc_sample_weight(row):

    error_mean = calc_error_mean(row)
    if error_mean >= error_mean_limit:
        return 0.

    return 1. - error_mean / error_mean_limit
def weighted_mse_loss(prds, tgts, weight):
    return torch.mean(weight * (prds - tgts)**2)

def criterion(prds, tgts):
#     print(prds)
#     print(tgts)
    return (torch.sqrt(torch.nn.MSELoss()(prds, tgts)))

def add_edges(edge_index, edge_features, node1, node2, feature1, feature2):
    edge_index.append([node1, node2])
    edge_features.append(feature1)
    edge_index.append([node2, node1])
    edge_features.append(feature2)

def add_edges_between_base_nodes(edge_index, edge_features, node1, node2):
    edge_feature1 = [
        0, # is edge for paired nodes
        0, # is edge between codon node and base node
        0, # is edge between coden nodes
        1, # forward edge: 1, backward edge: -1
        1, # bpps if edge is for paired nodes
    ]
    edge_feature2 = [
        0, # is edge for paired nodes
        0, # is edge between codon node and base node
        0, # is edge between coden nodes
        -1, # forward edge: 1, backward edge: -1
        1, # bpps if edge is for paired nodes
    ]
    add_edges(edge_index, edge_features, node1, node2,
              edge_feature1, edge_feature2)

def add_edges_between_paired_nodes(edge_index, edge_features, node1, node2,
                                   bpps_value):
    edge_feature1 = [
        1, # is edge for paired nodes
        0, # is edge between codon node and base node
        0, # is edge between coden nodes
        0, # forward edge: 1, backward edge: -1
        bpps_value, # bpps if edge is for paired nodes
    ]
    edge_feature2 = [
        1, # is edge for paired nodes
        0, # is edge between codon node and base node
        0, # is edge between coden nodes
        0, # forward edge: 1, backward edge: -1
        bpps_value, # bpps if edge is for paired nodes
    ]
    add_edges(edge_index, edge_features, node1, node2,
              edge_feature1, edge_feature2)

def add_edges_between_codon_nodes(edge_index, edge_features, node1, node2):
    edge_feature1 = [
        0, # is edge for paired nodes
        0, # is edge between codon node and base node
        1, # is edge between coden nodes
        1, # forward edge: 1, backward edge: -1
        0, # bpps if edge is for paired nodes
    ]
    edge_feature2 = [
        0, # is edge for paired nodes
        0, # is edge between codon node and base node
        1, # is edge between coden nodes
        -1, # forward edge: 1, backward edge: -1
        0, # bpps if edge is for paired nodes
    ]
    add_edges(edge_index, edge_features, node1, node2,
              edge_feature1, edge_feature2)

def add_edges_between_codon_and_base_node(edge_index, edge_features,
                                          node1, node2):
    edge_feature1 = [
        0, # is edge for paired nodes
        1, # is edge between codon node and base node
        0, # is edge between coden nodes
        0, # forward edge: 1, backward edge: -1
        0, # bpps if edge is for paired nodes
    ]
    edge_feature2 = [
        0, # is edge for paired nodes
        1, # is edge between codon node and base node
        0, # is edge between coden nodes
        0, # forward edge: 1, backward edge: -1
        0, # bpps if edge is for paired nodes
    ]
    add_edges(edge_index, edge_features, node1, node2,
              edge_feature1, edge_feature2)

def add_node(node_features, feature):
    node_features.append(feature)

def add_base_node(node_features, sequence, predicted_loop_type,
                  bpps_sum, bpps_nb):
    feature = [
        0, # is codon node
        sequence == 'A',
        sequence == 'C',
        sequence == 'G',
        sequence == 'U',
        predicted_loop_type == 'S',
        predicted_loop_type == 'M',
        predicted_loop_type == 'I',
        predicted_loop_type == 'B',
        predicted_loop_type == 'H',
        predicted_loop_type == 'E',
        predicted_loop_type == 'X',
        bpps_sum,
        bpps_nb,
    ]
    add_node(node_features, feature)

def add_codon_node(node_features):
    feature = [
        1, # is codon node
        0, # sequence == 'A',
        0, # sequence == 'C',
        0, # sequence == 'G',
        0, # sequence == 'U',
        0, # predicted_loop_type == 'S',
        0, # predicted_loop_type == 'M',
        0, # predicted_loop_type == 'I',
        0, # predicted_loop_type == 'B',
        0, # predicted_loop_type == 'H',
        0, # predicted_loop_type == 'E',
        0, # predicted_loop_type == 'X',
        0, # bpps_sum
        0, # bpps_nb
    ]
    add_node(node_features, feature)

    
def build_data(df, is_train):
    data = []
    for i in range(len(df)):
        targets = []
        node_features = []
        edge_features = []
        edge_index = []
        train_mask = []
        test_mask = []
        weights = []

        id = df.loc[i, 'id']
        path = os.path.join('../bpps', id + '.npy')
        bpps = np.load(path)
        bpps_sum = bpps.sum(axis=0)
        sequence = df.loc[i, 'sequence']
        structure = df.loc[i, 'structure']
        pair_info = match_pair(structure)
        predicted_loop_type = df.loc[i, 'predicted_loop_type']
        seq_length = df.loc[i, 'seq_length']
        seq_scored = df.loc[i, 'seq_scored']
        bpps_nb = (bpps > 0).sum(axis=0) / seq_length
        bpps_nb = (bpps_nb - bpps_nb_mean) / bpps_nb_std
        if is_train:
            sample_weight = calc_sample_weight(df.loc[i])

            reactivity = df.loc[i, 'reactivity']
            deg_Mg_pH10 = df.loc[i, 'deg_Mg_pH10']
            deg_Mg_50C = df.loc[i, 'deg_Mg_50C']

            for j in range(seq_length):
                if j < seq_scored:
                    targets.append([
                        reactivity[j],
                        deg_Mg_pH10[j],
                        deg_Mg_50C[j],
                        ])
                else:
                    targets.append([0, 0, 0])

        paired_nodes = {}
        for j in range(seq_length):
            add_base_node(node_features, sequence[j], predicted_loop_type[j],
                          bpps_sum[j], bpps_nb[j])

            if j + 1 < seq_length: # edge between current node and next node
                add_edges_between_base_nodes(edge_index, edge_features,
                                             j, j + 1)

            if pair_info[j] != -1:
                if pair_info[j] not in paired_nodes:
                    paired_nodes[pair_info[j]] = [j]
                else:
                    paired_nodes[pair_info[j]].append(j)

            train_mask.append(j < seq_scored)
            test_mask.append(True)
            if is_train:
                weights.append(sample_weight)

       
        for pair in paired_nodes.values():
            bpps_value = bpps[pair[0], pair[1]]
            add_edges_between_paired_nodes(edge_index, edge_features,
                                           pair[0], pair[1], bpps_value)

       
        codon_node_idx = seq_length - 1
        for j in range(seq_length):
            if j % 3 == 0:
                # add codon node
                add_codon_node(node_features)
                codon_node_idx += 1
                train_mask.append(False)
                test_mask.append(False)
                if is_train:
                    weights.append(0)
                    targets.append([0, 0, 0])

                if codon_node_idx > seq_length:
                    # add edges between adjacent codon nodes
                    add_edges_between_codon_nodes(edge_index, edge_features,
                                                  codon_node_idx - 1,
                                                  codon_node_idx)

            # add edges between codon node and base node
            add_edges_between_codon_and_base_node(edge_index, edge_features,
                                                  j, codon_node_idx)

        node_features = torch.tensor(node_features, dtype=torch.float)
        edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
        edge_features = torch.tensor(edge_features, dtype=torch.float)

        if is_train:
            data.append(MyData(x=node_features, edge_index=edge_index,
                               edge_attr=edge_features,
                               train_mask=torch.tensor(train_mask),
                               weight=torch.tensor(weights, dtype=torch.float),
                               y=torch.tensor(targets, dtype=torch.float)))
        else:
            data.append(MyData(x=node_features, edge_index=edge_index,
                               edge_attr=edge_features,
                               test_mask=torch.tensor(test_mask)))

    return data

def np_onehot(x, max=54):
    return np.eye(max)[x]

In [13]:
class SwishFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        y = x * torch.sigmoid(x)
        return y

    @staticmethod
    def backward(ctx, grad_output):
        x = ctx.saved_variables[0]
        sigmoid = torch.sigmoid(x)
        return grad_output * (sigmoid * (1 + x * (1 - sigmoid)))
F_swish = SwishFunction.apply
class Swish(nn.Module):
    def forward(self, x):
        return F_swish(x)
class ConvBn1d(nn.Module):
    def __init__(self, in_dim, out_dim, kernel_size, padding=0, dilation=1):
        super(ConvBn1d, self).__init__()
        self.conv = nn.Conv1d(in_dim, out_dim, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False)
        self.bn = nn.BatchNorm1d(out_dim)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = F_swish(x)
        x = F.dropout(x,0.2,training=self.training)
        return x
class PositionEncode(nn.Module):
    def __init__(self, dim, length=130):
        super(PositionEncode, self).__init__()
        position = torch.zeros(length,dim)
        p = torch.arange(0, length, dtype=torch.float).unsqueeze(1)
        div = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
        position[:,0::2] = torch.sin(p * div)
        position[:,1::2] = torch.cos(p * div)
        position = position.transpose(0, 1).reshape(1,dim,length) #.contiguous()
        self.register_buffer('position', position)

        #self.position = nn.Parameter( torch.randn(1, dim, length) ) #random

    def forward(self, x):
        batch_size, length, dim = x.shape

        position = self.position.repeat(batch_size, 1, 1)
        position = position[:, :, :length].contiguous()
        return position

# d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        num_target=5
        self.position = PositionEncode(64)
        self.cnn = nn.ModuleList([
            #ConvBn1d( 14,  32, kernel_size=1,  padding=0),
            ConvBn1d( 16,  64, kernel_size=5,  padding=2),
            ConvBn1d( 16, 128, kernel_size=11, padding=5),
            ConvBn1d( 16, 128, kernel_size=21, padding=10),
        ])

        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(384, 64, 1024, dropout=0.1, activation='relu'),
            2
        )
        # self.rnn = nn.GRU(256, 256, num_layers=2, batch_first=True, dropout=0, bidirectional=True)
        self.predict = nn.Linear(384,num_target)

    #https://discuss.pytorch.org/t/clarification-regarding-the-return-of-nn-gru/47363/2
    def forward(self, sequence):
        batch_size, length, dim = sequence.shape

        pos  = self.position(sequence)
        sequence = sequence.permute(0,2,1).contiguous()
        seq = [cnn(sequence) for cnn in self.cnn]
        x = torch.cat(seq+[pos], 1)
        x = x.permute(-1, 0, 1) #torch.Size([107, 8, 512])
        x = F.dropout(x,0.1,training=self.training)

        x = self.transformer(x) #torch.Size([107, 8, 512])
        x = x.permute(1, 0, 2).contiguous() #torch.Size([8, 512, 107])

        x = F.dropout(x,0.5,training=self.training)
        predict = self.predict(x)
        return predict



def mse_loss(predict,target):
    batch_size,length, num_target = target.shape
    predict = predict[:,:length]
    loss = F.mse_loss(predict,target)
    return loss


# https://www.kaggle.com/c/stanford-covid-vaccine/discussion/183211
def mcrmse_loss(predict,target):
    batch_size,length, num_target = target.shape
    predict = predict[:,:length]
    predict = predict.reshape(-1,num_target)
    target  = target.reshape(-1,num_target)

    l = (predict-target)**2
    l = l.mean(0)
    l = l**0.5
    loss = l.mean()
    return loss

In [79]:
class CNN2D(nn.Module):
    def __init__(self,layer_size,channel,drop1,drop2):
        super(CNN2D, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d((34, 2))
        self.conv1 = nn.Conv2d(2, channel, (3,2))
        self.conv2 = nn.Conv2d(channel, channel, (2,1))
        self.fc1 = nn.Linear(channel * 31, layer_size)
        self.fc2 = nn.Linear(layer_size, layer_size)
        self.fc3 = nn.Linear(layer_size, layer_size)
        self.fc4 = nn.Linear(layer_size, 68)
        self.drop1 = drop1
        self.drop2 = drop2

    def forward(self, x,channel):
#         x = self.avgpool(x)
        x = F.tanh(self.conv1(x))
        x = F.tanh(self.conv2(x))
        x = x.view(-1, channel * 31)
        x = F.tanh(self.fc1(x))
        x = F.dropout(x,self.drop1)
        x = F.tanh(self.fc2(x))
        x = F.dropout(x,self.drop2)
        x = F.tanh(self.fc3(x))
        x = self.fc4(x)
        return x

In [15]:
if __name__ == '__main__':
    seed = 777
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    kf = StratifiedKFold(5, shuffle=True, random_state=seed)
    

In [16]:
sequence=train['sequence'].map(lambda seq: [rna_dict[x] for x in seq]).tolist()
struct = train['structure'].map(lambda seq: [struct_dict[x] for x in seq]).tolist()
loop   = train['predicted_loop_type'].map(lambda seq: [loop_dict[x] for x in seq]).tolist()
test_sequence=test['sequence'].map(lambda seq: [rna_dict[x] for x in seq]).tolist()
test_struct = test['structure'].map(lambda seq: [struct_dict[x] for x in seq]).tolist()
test_loop   = test['predicted_loop_type'].map(lambda seq: [loop_dict[x] for x in seq]).tolist()

In [17]:
scaler = MinMaxScaler()
scaler.fit(np.array(sequence))
sequence=scaler.transform(np.array(sequence))
scaler = MinMaxScaler()
scaler.fit(np.array(loop))
loop=scaler.transform(np.array(loop))



In [34]:
def minmax(arr):
    for x in range(len(arr)):
        maxi = max(arr[x])
        mini = min(arr[x])
        for y in  range(len(arr[x])):
            arr[x][y]=(arr[x][y]-mini)/(maxi-mini)
            
minmax(test_sequence)
minmax(test_loop)

In [36]:
def get_train_item(index):
    data = []
    data.append(np.array(np_onehot(sequence[index],4)))
    data.append(np_onehot(loop[index],7))
    data.append(create_adj(index))
    return (data)

def get_test_item(index):
    data = []
    data.append(np.array(np_onehot(test_sequence[index],4)))
    data.append(np_onehot(test_loop[index],7))
    data.append(test_create_adj(index))
    return (data)

In [41]:
def get_folded_seq(index):
    final = []
    seq = sequence[index]
    if(len(seq)==107):
        seq = seq[0:68]
        final.append(seq[0:34])
        final.append(list(reversed(seq[34:68])))
    else:
        seq = seq[0:92]
        final.append(seq[0:46])
        final.append(list(reversed(seq[46:92])))
    return final

def get_folded_loop(index):
    final = []
    seq = loop[index]
    if(len(seq)==107):
        seq = seq[0:68]
        final.append(seq[0:34])
        final.append(list(reversed(seq[34:68])))
    else:
        seq = seq[0:92]
        final.append(seq[0:46])
        final.append(list(reversed(seq[46:92])))
    return final

def test_get_folded_seq(index):
    final = []
    seq = test_sequence[index]
    if(len(seq)==107):
        seq = seq[0:68]
        final.append(seq[0:34])
        final.append(list(reversed(seq[34:68])))
    else:
        seq = seq[0:92]
        final.append(seq[0:46])
        final.append(list(reversed(seq[46:92])))
    return final

def test_get_folded_loop(index):
    final = []
    seq = test_loop[index]
    if(len(seq)==107):
        seq = seq[0:68]
        final.append(seq[0:34])
        final.append(list(reversed(seq[34:68])))
    else:
        seq = seq[0:92]
        final.append(seq[0:46])
        final.append(list(reversed(seq[46:92])))
    return final

def build_channel(index):    
    final = []
    for x,y in zip(get_folded_seq(index),get_folded_loop(index)):
        tmp = []
        for a,b in zip(x,y):
            tmp.append([a,b])
        final.append(tmp)
    return final

def test_build_channel(index):    
    final = []
    for x,y in zip(test_get_folded_seq(index),test_get_folded_loop(index)):
        tmp = []
        for a,b in zip(x,y):
            tmp.append([a,b])
        final.append(tmp)
    return final

def get_one_hot(index,base,adj):
#     start = perf_counter()
#     end = perf_counter()
#     print(end-start)
#     print("num of paired: ",sum(adj[base]))
    seq = np.zeros((4,5))
    loops = np.zeros((4,8))
    j = 0
#     print(train.iloc[index]["pair_prob"][base])
    x = adj[base]
    for i in range(len(x)):
        if(j==3):
            break
        if(x[i]==1):
#             print(np_onehot(sequence[index][base]+1,5))
            seq[j]=(np_onehot(sequence[index][base]+1,5))
            loops[j]=(np_onehot(loop[index][base]+1,8))
            j+=1
#     print(j)
    return (seq,loops)
            

In [58]:
xs = train
ys = train[['reactivity','deg_Mg_pH10','deg_Mg_50C']]
x_train,x_test,y_train,y_test = train_test_split(xs,ys,test_size=0.2,shuffle=True)


(array([   0,    1,    2, ..., 2094, 2095, 2096]), array([420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432,
       433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445,
       446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458,
       459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471,
       472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484,
       485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497,
       498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510,
       511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523,
       524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536,
       537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549,
       550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562,
       563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575,
       576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588,
       589, 5

In [80]:
def train_cnn(layer_size,alpha,batch_size,channel,epochs,drop1,drop2):
    net = CNN2D(layer_size,channel,drop1,drop2)
    optimizer = torch.optim.Adam(net.parameters(), lr=alpha)
    losses = []
    avgpool = nn.AdaptiveAvgPool2d((34, 2))
    for epoch in range(epochs):
        batch = []
        cnn_input = []
        results = []
        ys = []
        net.train()
        for x in range(1500):

            input_t=build_channel(x)
            batch.append(input_t)
            ys.append(train.iloc[x]["reactivity"])
            if(len(batch)%batch_size==batch_size-1):
#                 print(torch.Tensor(batch).shape)
                batch_input = avgpool(torch.Tensor(batch).to(device))
                result = net(batch_input,channel)
                loss = criterion(result, torch.Tensor(ys).to(device))
                loss.backward()
                optimizer.step()
                losses.append(loss.item())
                optimizer.zero_grad()
                batch = []
                ys = []
    return sum(losses)/len(losses),net

def test_cnn(cnn,batch_size,channel,epochs):
    net = cnn
#     optimizer = torch.optim.Adam(net.parameters(), lr=alpha)
    losses = []
    avgpool = nn.AdaptiveAvgPool2d((34, 2))
    for epoch in range(epochs):
        batch = []
        cnn_input = []
        results = []
        ys = []
        net.eval()
        for x in range(1500,len(train)):

            input_t=build_channel(x)
            batch.append(input_t)
            ys.append(train.iloc[x]["reactivity"])
            if(len(batch)%batch_size==batch_size-1):
#                 print(torch.Tensor(batch).shape)
                batch_input = avgpool(torch.Tensor(batch).to(device))
                result = net(batch_input,channel)
                loss = criterion(result, torch.Tensor(ys).to(device))
#                 loss.backward()
#                 optimizer.step()
                losses.append(loss.item())
#                 optimizer.zero_grad()
                batch = []
                ys = []
    return sum(losses)/len(losses)

In [81]:
def objective(trial):
    layer_size = 100
    alpha = 0.005
    batch_size = 80
    test_batch_size = trial.suggest_int('test_batch_size', 10, 100)
    epochs = trial.suggest_int('epochs', 10, 100)
    drop1 = trial.suggest_uniform('dropout_rate', 0.0, 1.0)
    drop2 = trial.suggest_uniform('dropout_rate2', 0.0, 1.0)
    channels = 5
    results,net = train_cnn(layer_size,alpha,batch_size,channels,epochs,drop1,drop2)
    test_results = test_cnn(net,test_batch_size,channels,1)
    return test_results
study = optuna.create_study()
study.optimize(objective, n_trials=50)

[I 2020-10-02 22:06:27,374] A new study created in memory with name: no-name-d9b2848f-d7e2-439b-9220-149cd3a4d352
[I 2020-10-02 22:07:58,723] Trial 0 finished with value: 0.36479505045073374 and parameters: {'test_batch_size': 82, 'epochs': 76, 'dropout_rate': 0.6266424260925496, 'dropout_rate2': 0.1925256072483852}. Best is trial 0 with value: 0.36479505045073374.
[I 2020-10-02 22:08:47,990] Trial 1 finished with value: 0.37408627657329335 and parameters: {'test_batch_size': 36, 'epochs': 41, 'dropout_rate': 0.32559093668713524, 'dropout_rate2': 0.7838413687762356}. Best is trial 0 with value: 0.36479505045073374.
[I 2020-10-02 22:09:47,282] Trial 2 finished with value: 0.3656082192192907 and parameters: {'test_batch_size': 26, 'epochs': 46, 'dropout_rate': 0.7015473193011258, 'dropout_rate2': 0.013918710380957156}. Best is trial 0 with value: 0.36479505045073374.
[I 2020-10-02 22:10:23,195] Trial 3 finished with value: 0.3552745034297307 and parameters: {'test_batch_size': 96, 'epoch

[I 2020-10-02 22:41:59,296] Trial 31 finished with value: 0.34233276546001434 and parameters: {'test_batch_size': 98, 'epochs': 62, 'dropout_rate': 0.06822048778273514, 'dropout_rate2': 0.3676579561959485}. Best is trial 15 with value: 0.33007446442331584.
[I 2020-10-02 22:42:58,553] Trial 32 finished with value: 0.3503526863124635 and parameters: {'test_batch_size': 34, 'epochs': 47, 'dropout_rate': 0.14097251427996169, 'dropout_rate2': 0.35122068622752395}. Best is trial 15 with value: 0.33007446442331584.
[I 2020-10-02 22:44:04,016] Trial 33 finished with value: 0.34898888568083447 and parameters: {'test_batch_size': 91, 'epochs': 58, 'dropout_rate': 0.2999884280316211, 'dropout_rate2': 0.4678815695237462}. Best is trial 15 with value: 0.33007446442331584.
[I 2020-10-02 22:45:12,945] Trial 34 finished with value: 0.3493573580469404 and parameters: {'test_batch_size': 84, 'epochs': 60, 'dropout_rate': 0.01281695390132842, 'dropout_rate2': 0.0893311730311346}. Best is trial 15 with va

In [None]:
sum(losses)/len(losses)

In [None]:
plt.plot(losses)

In [None]:
class DNN(nn.Module):
    def __init__(self,layer_size):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(52, layer_size)
        self.fc2 = nn.Linear(layer_size, layer_size)
        self.fc3 = nn.Linear(layer_size, layer_size)
        self.fc4 = nn.Linear(layer_size, layer_size)
        self.fc5 = nn.Linear(layer_size, layer_size)
        self.fc6 = nn.Linear(layer_size, layer_size)
        self.fc7 = nn.Linear(layer_size,1)
        
    def forward(self, x):
        x = self.fc1(x)
        x = torch.tanh(x)
#         x = F.dropout(x,self.drop1)
#         x = self.fc2(x)
#         x = torch.tanh(x)
#         x = F.dropout(x,self.drop2)
#         x = self.fc3(x)
#         x = torch.tanh(x)
#         x = F.dropout(x,self.drop3)
#         x = self.fc4(x)
#         x = torch.tanh(x)
#         x = F.dropout(x,self.drop3)
#         x = self.fc5(x)
#         x = torch.tanh(x)
#         x = F.dropout(x,self.drop3)
        x = self.fc7(x)
        output =  x = torch.relu(x)
        
        return output

In [None]:
def train_dnn(layer_size,alpha,batch_size):    
    dnn = DNN(layer_size)
    epochs = 1
    optimizer = torch.optim.SGD(net.parameters(), lr=alpha)
    losses = []
    input_t =[]
    for epoch in range(epochs):
        batch = []
        cnn_input = []
        results = []
        ys = []
        dnn.train()
        react = train["reactivity"].values
        for x in tqdm(range(len(train))):
            adj = create_adj(x)
            for y in range(train.iloc[x]["seq_scored"]):
    #             start = perf_counter()
                one_hot = get_one_hot(x,y,adj)

                for s in (one_hot[0]):
    #                 print(s)
                    input_t.extend(s)
                for l in (one_hot[1]):
    #                 print(l)
                    input_t.extend(l)
    #             end = perf_counter()
    #             print(end-start)
    #             print(input_t)
    #             break
    #             start=perf_counter()
                optimizer.zero_grad()
                batch.append(input_t)
                input_t = []
                ground = [react[x][y]]
                ys.append(ground)
    #             end = perf_counter()
    #             print(end-start)
    #             ys = np.asarray(ys).reshape((1,49))
                if(len(batch)%batch_size==batch_size-1):
                    result = dnn(torch.Tensor(batch).to(device))
                    loss = criterion(result, torch.Tensor(ys).to(device))
                    loss.backward()
                    optimizer.step()
                    losses.append(loss.item())
                    batch = []
                    ys = []
    return (sum(losses)/len(losses))

In [None]:
def objective(trial):
    layer_size = trial.suggest_int('layer_size', 10, 50)
    alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-3)
    batch_size = trial.suggest_int('batch_size', 500, 2000)
    results = train_dnn(layer_size,alpha,batch_size)
    return results

In [None]:
study = optuna.create_study()
study.optimize(objective, n_trials=10)

In [None]:
print(sum(losses)/len(losses))
plt.plot(losses)