In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
import torch
import torch.nn as nn
from transformers import RobertaModel, RobertaTokenizer, get_linear_schedule_with_warmup
from torch.optim import AdamW,Adam
from torch.utils.data import Dataset, DataLoader,random_split
from tqdm import tqdm
import random
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import wandb

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

set_seed(42)


def count_parameters_per_layer(model):
    param_counts = {}
    for name, module in model.named_modules():
        param_counts[name] = sum(p.numel() for p in module.parameters() if p.requires_grad)
    return param_counts

# RMSE 손실 함수 정의
def rmse_loss(y_pred, y_true):
    mse = torch.nn.MSELoss()(y_pred, y_true)
    return torch.sqrt(mse)

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU!")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU!")
    

CUDA is available. Using GPU!


In [6]:
class ECFPDataset(Dataset):
    def __init__(self,ecfp,hlm,mlm):
        self.ecfp = ecfp.tolist()
        self.hlm = hlm.tolist()
        self.mlm = mlm.tolist()
        
    def __getitem__(self, index):
        mlm = torch.tensor(self.mlm[index],dtype=torch.float)
        hlm = torch.tensor(self.hlm[index],dtype=torch.float)
        ecfp = torch.tensor(self.ecfp[index],dtype=int)
        return ecfp, mlm, hlm

In [None]:
class AttentionBasedRegressor(nn.Module):
    def __init__(self, pretrained_model_name):
        super(AttentionBasedRegressor, self).__init__()
        self.encoder = RobertaModel.from_pretrained(pretrained_model_name, output_attentions=True)
        
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(self.encoder.config.hidden_size, self.encoder.config.hidden_size)
        self.relu = nn.ReLU()
        
        # 최종 예측을 위한 Linear 레이어
        self.regressor = nn.Linear(self.encoder.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask=None):
        outputs = self.encoder(input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        attention_scores = outputs.attentions[-1]
        attention_weights = attention_scores.mean(dim=1)
        attention_weights_avg = attention_weights.mean(dim=-1)
        weighted_avg = torch.sum(sequence_output * attention_weights_avg.unsqueeze(-1), dim=1)


        x = self.fc1(weighted_avg)
        x = self.relu(x)
        x = self.dropout(x)
        
        mlm_prediction = self.regressor(x)
        hlm_prediction = self.regressor(x)
        return mlm_prediction, hlm_prediction


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GraphAttentionLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        # Weight matrices
        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)

        # Attention mechanisms
        self.a = nn.Parameter(torch.zeros(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj):
        h = torch.mm(input, self.W)
        N = h.size()[0]

        # Compute attention coefficients
        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        # Masked attention
        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)  # 활성화 함수 ELU를 적용하여 결과 반환
        else:
            return h_prime  # 결과 반환

class GAT(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
        super(GAT, self).__init__()
        self.dropout = dropout

        # Multi-head attention layers
        self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)

        # Output layer
        self.out_att = GraphAttentionLayer(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)

    def forward(self, x, adj):
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.elu(self.out_att(x, adj))
        return F.log_softmax(x, dim=1)

# Example usage:
# model = GAT(nfeat=features.shape[1], nhid=8, nclass=labels.max().item() + 1, dropout=0.6, alpha=0.2, nheads=8)
# optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
