In [3]:
import torch

In [49]:
import torch.nn as nn
import torch.nn.functional as F 
class BLSTM(nn.Module):
    def __init__(self, config) -> None:
        super().__init__()
        self.input_dim=config['input_dim']
        self.hidden_dim=config['hidden_dim']
        self.num_layers=config['num_layers']
        self.use_blstm=config['use_blstm']
        self.dropout_rate=config['dropout_rate']
        self.max_text_len=config['max_text_len']
        self.attention=config['attention']
        self.num_directions=2 if self.use_blstm else 1
        if self.attention:
            self.attention_layer=nn.Sequential(
                nn.Linear(self.hidden_dim,self.hidden_dim),
                nn.ReLU(inplace=True)                
            )
        self.lstm=nn.LSTM(
            input_size=self.input_dim,
            hidden_size=self.hidden_dim,
            num_layers=self.num_layers,
            dropout=self.dropout_rate,
            batch_first=True,
            bidirectional=self.use_blstm
        )
        self.batch_norm=nn.BatchNorm1d(self.num_directions*self.hidden_dim)
        self.num_start_dim=self.num_directions*self.input_dim*self.max_text_len
    
    def attention_net_with_w(self,lstm_out):
        # lstm_out_1,lstm_out_2=lstm_out.chunk(2,dim=2)
        lstm_out_1,lstm_out_2=torch.chunk(lstm_out, 2, -1)
        h=lstm_out_1+lstm_out_2 
        atten_w=self.attention_layer(h)
        m=nn.Tanh()(h)
        atten_context=torch.bmm(m, atten_w.transpose(1,2))
        softmax_w=F.softmax(atten_context,dim=-1)
        context=torch.bmm(h.transpose(1,2),softmax_w)
        result=torch.sum(context,dim=-1)
        result=nn.Dropout(self.dropout_rate)(result)
        return result
    
    def forward(self,input):
        batch_size=input.shape[0]
        hidden_state=torch.randn(self.num_layers*self.num_directions,batch_size,self.hidden_dim)
        cell_sate=torch.randn(self.num_layers*self.num_directions,batch_size,self.hidden_dim)
        self.lstm.flatten_parameters()
        outputs,(_,_)=self.lstm(input.float(),(hidden_state,cell_sate))
        if self.attention:
            outputs=self.attention_net_with_w(outputs)
            outputs=self.batch_norm(outputs)
        else:
            outputs=outputs.transpose(1,2)
            outputs=self.batch_norm(outputs)
            outputs=outputs.transpose(1,2)
        return outputs

In [53]:
import torch.nn.functional as F 
lstm_out=torch.randn(64,70,64)
lstm_out_1,lstm_out_2=lstm_out.chunk(2,dim=2)
h=lstm_out_1+lstm_out_2 
attention_layer=nn.Sequential(
                nn.Linear(256,256),
                nn.ReLU(inplace=True)                
            )
atten_w=attention_layer(h)
m=nn.Tanh()(h)
atten_context=torch.bmm(m,atten_w.transpose(1,2))
softmax_w=F.softmax(atten_context,dim=-1)
context=torch.bmm(h.transpose(1,2),softmax_w)
# context = torch.bmm(h, softmax_w)
context.shape

RuntimeError: mat1 and mat2 shapes cannot be multiplied (4480x32 and 256x256)

In [47]:
def attention_net_with_w(lstm_out):
        # lstm_out_1,lstm_out_2=lstm_out.chunk(2,dim=2)
        lstm_out_1,lstm_out_2=torch.chunk(lstm_out, 2, -1)
        h=lstm_out_1+lstm_out_2 
        atten_w=attention_layer(h)
        m=nn.Tanh()(h)
        atten_context=torch.bmm(m,atten_w.transpose(1,2))
        softmax_w=F.softmax(atten_context,dim=-1)
        context=torch.bmm(h.transpose(1,2),softmax_w)
        result=torch.sum(context,dim=-1)
        result=nn.Dropout(0.5)(result)
        return result

In [28]:
x=torch.Tensor([
    [1,2,3],
    [2,3,4]
])
x=torch.sum(x,dim=-1)
nn.Dropout(0.5)(x)

tensor([ 0., 18.])

In [2]:
config = {
    'input_dim': 100,
    'hidden_dim': 256,
    'num_layers': 2,
    'use_blstm': True,
    'dropout_rate': 0.5,
    'max_text_len': 50,
    'attention': True
    # 'embeddings': torch.randn(10000, 100)  # 假设的嵌入矩阵
}

model = BLSTM(config)

In [54]:
#仅仅用于测试
import numpy as np
if __name__ == '__main__':
    # The code below is used for test!

    # Training
    batch_size = 128
    nb_epoch = 100

    # Embedding
    # 暂时考虑amino acid用26个字母表示+1个为空
    vocab_size = 27
    embedding_size = 128

    # Convolution
    # 第一个CNN Module：filter_length = 3
    filter_length1 = 3
    pool_length = 2
    feature_size = 32

    # LSTM
    lstm_output_size = 64

    x = np.random.randn(64, 70, 64)
    x = torch.tensor(x, dtype=torch.float32)

    lstm_config = {'dropout_rate': 0.2, 'input_dim': 64,
                   'hidden_dim':64, 'output_dim': 2, 'num_layers': 2, 'max_text_len': 70, 'classifier': True,
                   'use_norm': True, 'use_blstm': True,'attention':False}
    #original: hidden_dim = 64

    model = BLSTM(lstm_config)
    print(x.dtype)
    y = model(x)
    sum = torch.sum(y)

    grads = torch.autograd.grad(sum, model.parameters())

torch.float32
