In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import OneHotEncoder
import gensim
import numpy as np
import matplotlib.pyplot as plt


EVENT_TEMPLATE_COLUMNS = ['EventTemplate{}'.format(i) for i in range(1, 6)]
NEXT_EVENT_TEMPLATE_COLUMN = 'Next_EventTemplate'

# 加载LDA模型
lda_model_path = r'E:\Code for Project2024\lda_model\model'
lda_model = gensim.models.LdaModel.load(lda_model_path)
n_topics = lda_model.num_topics

# 定义LSTM网络
class LSTMNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, hidden=None):
        if hidden is None:
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
            hidden = (h0, c0)
        out, _ = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])
        return out

In [2]:
# 数据处理
csv_file_path = r'E:\Code for Project2024\Data_for_Train\L2\L2 validation_dataset.csv'
df = pd.read_csv(csv_file_path)

# 生成每个EventTemplate的LDA概率分布
def get_lda_vector(lda_model, event_templates):
    # 转换event_templates为适合LDA模型的输入形式
    # 假设event_templates已经是预处理过的数据，每个EventTemplate是一个字符串
    corpus = [lda_model.id2word.doc2bow(event.split()) for event in event_templates]
    lda_vectors = [lda_model[doc] for doc in corpus]
    # 转换为密集的独热码形式
    lda_vectors_dense = np.zeros((len(lda_vectors), n_topics))
    for i, doc in enumerate(lda_vectors):
        for topic, prob in doc:
            lda_vectors_dense[i, topic] = prob
    return lda_vectors_dense

# 获取数据集中所有EventTemplate和Next_EventTemplate的LDA概率分布
event_lda_vectors = get_lda_vector(lda_model, df[EVENT_TEMPLATE_COLUMNS].values.flatten())
next_event_lda_vectors = get_lda_vector(lda_model, df[NEXT_EVENT_TEMPLATE_COLUMN].values)

# 将Next_EventTemplate的LDA概率分布转换为独热码
encoder = OneHotEncoder()
next_event_onehot = encoder.fit_transform(next_event_lda_vectors.argmax(axis=1).reshape(-1, 1))


# 转换为Tensor
event_lda_tensors = torch.tensor(event_lda_vectors, dtype=torch.float).view(len(df), -1, n_topics)
next_event_onehot_tensors = torch.tensor(next_event_onehot.toarray(), dtype=torch.float)

In [4]:
# 检查是否有可用的GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("将在"+ str(device) +"上验证模型")

# 定义LSTM模型
model = LSTMNet(input_dim=n_topics, hidden_dim=5, num_layers=3, output_dim=n_topics).to(device)

# 将之前训练好的模型导入
model.load_state_dict(torch.load(r'E:\Code for Project2024\LSTM_Model\LSTM_model.pth'))


将在cuda上验证模型


<All keys matched successfully>

In [24]:
# 验证阶段
total = len(event_lda_tensors)
correct_predictions = 0

event_lda_tensors = event_lda_tensors.cuda()
next_event_onehot_tensors = next_event_onehot_tensors.cuda()

# 设置模型模式为评估模式
model.eval()

with torch.no_grad():
    for i in range(total):
        current_event_lda = event_lda_tensors[i:i+1]
        current_next_event_onehot = next_event_onehot_tensors[i:i+1]
        # 推理
        outputs = model(current_event_lda)
        
        # 使用softmax将输出转换为概率分布
        probabilities = nn.functional.softmax(outputs, dim=1)
        # 获取前 k个最大值的索引
        _, top3_indices = torch.topk(probabilities, k=18, dim=1)
        
        # 将前三个最大值转换为独热码
        top3_onehot = torch.zeros_like(probabilities)
        top3_onehot.scatter_(1, top3_indices, 1)
        
        # 检查真实标签是否在前 k个最大值中
        true_label = torch.argmax(current_next_event_onehot, dim=1).item()
        if true_label in top3_indices.tolist()[0]:
            correct_predictions += 1
            
accuracy = correct_predictions / total
print("模型准确度:", accuracy)

模型准确度: 0.6729278235153066


In [1]:
hex_data = b'\xaa\xbb\xcc\xdd'