In [1]:
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn

# Load data
df = pd.read_csv(r"/home/ec2-user/SageMaker/Chatglm3_vector/question_paragraph_1比2.csv")
label = df['label'].to_numpy()
load_np1 = np.load(r"/home/ec2-user/SageMaker/Chatglm3_vector/question2answer-sen1_vector.npz")
vector1_ = load_np1['sen1']
load_np2 = np.load(r"/home/ec2-user/SageMaker/Chatglm3_vector/question2answer-sen2_vector.npz")
vector2_ = load_np2['sen1']

# Split data
train_vector1, test_vector1, train_vector2, test_vector2, train_y, test_y = train_test_split(
    vector1_, vector2_, label, test_size=0.1, random_state=42)

# Convert to PyTorch tensors
train_vector1 = torch.tensor(train_vector1, dtype=torch.float32)
train_vector2 = torch.tensor(train_vector2, dtype=torch.float32)
final_vector_train = torch.cat((train_vector1, train_vector2), dim=-1).squeeze(1)
train_y = torch.tensor(train_y, dtype=torch.long)

test_vector1 = torch.tensor(test_vector1, dtype=torch.float32)
test_vector2 = torch.tensor(test_vector2, dtype=torch.float32)
final_vector_test = torch.cat((test_vector1, test_vector2), dim=-1).squeeze(1)
test_y = torch.tensor(test_y, dtype=torch.long)

# Create TensorDatasets
trainsets = TensorDataset(final_vector_train, train_y)
testsets = TensorDataset(final_vector_test, test_y)

# Create DataLoaders
trainloader = DataLoader(trainsets, batch_size=128, shuffle=True, num_workers=0)
testloader = DataLoader(testsets, batch_size=128, shuffle=False, num_workers=0)



In [2]:
class SelfAttention(nn.Module):
    def __init__(self, input_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(input_dim, input_dim)
        self.key = nn.Linear(input_dim, input_dim)
        self.value = nn.Linear(input_dim, input_dim)
        self.softmax = nn.Softmax(dim=-1)
    
    def forward(self, x):
        # print(f"SelfAttention input shape: {x.shape}")
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        # print(f"Q shape: {Q.shape}, K shape: {K.shape}, V shape: {V.shape}")
        
        attention_scores = torch.matmul(Q, K.transpose(-2, -1)) / (x.size(-1) ** 0.5)
        attention_weights = self.softmax(attention_scores)
        attention_output = torch.matmul(attention_weights, V)
        # print(f"Attention output shape: {attention_output.shape}")
        
        return attention_output

class AttentionModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(AttentionModel, self).__init__()
        self.self_attention1 = SelfAttention(input_dim)
        self.self_attention2 = SelfAttention(input_dim)
        self.self_attention3 = SelfAttention(input_dim)
        
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # print(f"Model input shape: {x.shape}")
        x = self.self_attention1(x)
        x = self.self_attention2(x)
        x = self.self_attention3(x)
        # print(f"Shape after attention layers: {x.shape}")
        
        x = self.relu(self.fc1(x))
        # print(f"Shape after fc1: {x.shape}")
        x = self.relu(self.fc2(x))
        # print(f"Shape after fc2: {x.shape}")
        x = self.fc3(x)
        # print(f"Model output shape: {x.shape}")
        
        return x


In [3]:
import os 
lr = 5e-5
epochs = 100
epoch_save_test=20
model = AttentionModel(input_dim=final_vector_train.size(1), num_classes=2)
weight = torch.tensor([1.0,2.0],dtype=torch.float32)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
model_folder = "/home/ec2-user/SageMaker/Chatglm3_vector/attention_retreival_model"
os.makedirs(model_folder, exist_ok=True)

In [5]:
import torch
import datetime
import os
from tqdm import tqdm

for epoch in tqdm(range(epochs),total=epochs, desc="training..."):
    print("epoch:", epoch)
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    count = 0
    try:
        for i, data in enumerate(trainloader, 0):
            count += 1

            
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, predicted_train = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted_train == labels).sum().item()

        avg_train_loss = running_loss / len(trainloader)
        train_accuracy = 100 * correct_train / total_train
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")
    except Exception as e:
        print(f"Error occurred in epoch {epoch+1}: {e}")
        break

    # 測試模型
    model.eval()
    correct = 0
    total = 0
    running_val_loss = 0.0
    # 第epoch_save_test迴圈測驗模型
    if (epoch + 1) % epoch_save_test == 0:
        with torch.no_grad():
            for data in testloader:
                inputs, labels = data
                outputs = model(inputs)
                loss = criterion(outputs, labels)  # 計算驗證損失
                running_val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_val_loss = running_val_loss / len(testloader)  # 驗證損失平均值
        accuracy = 100 * correct / total
        print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy on test set: {accuracy:.2f}%")
    
    # 第epoch_save_test迴圈時儲存模型
    if (epoch + 1) % epoch_save_test == 0:
        current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        model_name = (f"model_{current_time}_epoch{epoch+1}_"
                      f"batchsize{trainloader.batch_size}_lr{optimizer.param_groups[0]['lr']}_"
                      f"train_loss{avg_train_loss:.4f}_train_acc{train_accuracy:.2f}_"
                      f"val_loss{avg_val_loss:.4f}_test_acc{accuracy:.2f}.pth")
        
        # 確保目錄存在，否則創建目錄
        os.makedirs(model_folder, exist_ok=True)
        
        # 完整路徑
        model_path = os.path.join(model_folder, model_name)
        torch.save(model.state_dict(), model_path)
        print(f"Model saved as {model_path}")  # 確保這行代碼結束後無多餘的符號


epoch: 0


KeyboardInterrupt: 