### Loading libraries

In [114]:
import torch
import torch.nn as nn
import os

### Setup

In [115]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [116]:
aminoacids = "ACDEFGHIKLMNPQRSTVWY"
struct_types = "hst-"

### Model loading (deve essere nello stesso path del notebook...)

In [125]:
class RecurrentModel(nn.Module):

    def __init__(self, in_size, out_size, hidden_size, n_layers):
        super().__init__()
        #self.rnn_model = nn.GRU(input_size=in_size, hidden_size=hidden_size, num_layers=n_layers)
        self.rnn_model = nn.RNN(input_size=in_size, hidden_size=hidden_size, num_layers=n_layers)

        self.final_layer = nn.Linear(in_features=hidden_size, out_features=out_size)


    def forward(self, x):

        x, hiddens = self.rnn_model(x)
        #x = torch.relu(x)
        x = self.final_layer(x)
        #x = torch.sigmoid(x)
        return x, hiddens

In [126]:
def get_model(in_size, out_size, hidden_size, n_layers):

    if hidden_size is None:
        hidden_size = in_size

    model = RecurrentModel(in_size, out_size, hidden_size, n_layers)
    print(model)
    return model

In [127]:
input_dim = 20
output_dim = 4
rnn = get_model(input_dim, output_dim, hidden_size=20, n_layers=3)

RecurrentModel(
  (rnn_model): RNN(20, 20, num_layers=3)
  (final_layer): Linear(in_features=20, out_features=4, bias=True)
)


In [128]:
state = torch.load('struc_classifier_SD2_0.7907', map_location=device)

In [129]:
rnn.load_state_dict(state)

<All keys matched successfully>

### Show model architecture

In [134]:
rnn

RecurrentModel(
  (rnn_model): RNN(20, 20, num_layers=3)
  (final_layer): Linear(in_features=20, out_features=4, bias=True)
)

In [135]:
def sequenceToTensor(sequence):
    aminoacids = "ACDEFGHIKLMNPQRSTVWY"
    seqTensor = torch.zeros((len(sequence), len(aminoacids)))
    for j, aa in enumerate(sequence):
        aa_idx = aminoacids.index(aa)
        seqTensor[j, aa_idx] = 1.
    return seqTensor

### Usage: insert aminoacid sequence

In [136]:
aminoacid_sequence = input()

AYWTVTTTTTLMLMLMLMLMLLM


In [137]:
print(aminoacid_sequence)

AYWTVTTTTTLMLMLMLMLMLLM


### Data preprocessing step 

In [138]:
tensor_seq = sequenceToTensor(aminoacid_sequence)

### Prediction step

In [139]:
classifier = rnn

In [140]:
with torch.no_grad():
    classifier.eval()
    out, _ = classifier(tensor_seq.unsqueeze(1))
#print(out)

In [141]:
out_idxs = out.squeeze().argmax(1)
out_idxs

tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Encoding the prediction in a readable format

In [142]:
predicted = [struct_types[j] for j in out_idxs]
print("aminoacids : ",list(aminoacid_sequence))
print("sec. struct: ", predicted)

aminoacids :  ['A', 'Y', 'W', 'T', 'V', 'T', 'T', 'T', 'T', 'T', 'L', 'M', 'L', 'M', 'L', 'M', 'L', 'M', 'L', 'M', 'L', 'L', 'M']
sec. struct:  ['-', '-', '-', '-', '-', '-', '-', '-', '-', '-', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h']
