In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from tqdm import tqdm

# ==========================================
# 1. 설정 (속도 최적화 적용)
# ==========================================
class Config:
    CSV_PATH = "LID-DS-2021_Seq2Seq_Dataset.csv"
    MAX_LEN = 50
    BATCH_SIZE = 1024
    NUM_WORKERS = 0       # 데이터 로딩 병렬 처리
    EMBED_DIM = 64
    HIDDEN_DIM = 128
    EPOCHS = 5
    LR = 0.001
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. 데이터셋
# ==========================================
class ComparisonDataset(Dataset):
    def __init__(self, csv_file, max_len=50):
        self.df = pd.read_csv(csv_file)
        self.max_len = max_len
        
        # NaN 처리 및 문자열 변환
        self.df['input_sequence'] = self.df['input_sequence'].fillna("").astype(str)
        
        # 단어 사전 구축
        self.vocab = {"<PAD>": 0, "<UNK>": 1}
        self.build_vocab()
        
    def build_vocab(self):
        idx = 2
        for seq in self.df['input_sequence']:
            for word in seq.split():
                if word not in self.vocab:
                    self.vocab[word] = idx
                    idx += 1
                    
    def text_to_idx(self, text):
        return [self.vocab.get(w, 1) for w in str(text).split()]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index]
        text = row['input_sequence']
        label = row['label'] # 0 or 1
        
        indices = self.text_to_idx(text)
        
        # Padding
        if len(indices) < self.max_len:
            indices += [0] * (self.max_len - len(indices))
        else:
            indices = indices[:self.max_len]
            
        return torch.tensor(indices), torch.tensor(label, dtype=torch.float)

# ==========================================
# 3. 모델 정의 (3가지 종류)
# ==========================================

# Model 1: Standard LSTM
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hid_dim, batch_first=True)
        self.fc = nn.Linear(hid_dim, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        embedded = self.embedding(x)
        _, (hidden, _) = self.lstm(embedded)
        return self.sigmoid(self.fc(hidden[-1]))

# Model 2: CNN + LSTM
class CNNLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.conv = nn.Conv1d(in_channels=emb_dim, out_channels=hid_dim, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.lstm = nn.LSTM(hid_dim, hid_dim, batch_first=True)
        self.fc = nn.Linear(hid_dim, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        embedded = self.embedding(x) # (batch, seq, emb)
        embedded = embedded.permute(0, 2, 1) # (batch, emb, seq) -> Conv1d input
        cnn_out = self.pool(self.relu(self.conv(embedded)))
        cnn_out = cnn_out.permute(0, 2, 1) # (batch, seq/2, hid) -> LSTM input
        _, (hidden, _) = self.lstm(cnn_out)
        return self.sigmoid(self.fc(hidden[-1]))

# Model 3: Bi-Directional LSTM
class BiLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hid_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hid_dim * 2, 1) 
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        embedded = self.embedding(x)
        _, (hidden, _) = self.lstm(embedded)
        hidden_cat = torch.cat((hidden[-2], hidden[-1]), dim=1)
        return self.sigmoid(self.fc(hidden_cat))

# ==========================================
# 4. 학습 및 평가 함수 (상세 지표 추가)
# ==========================================
def train_and_evaluate(model_name, model, train_loader, test_loader, device):
    print(f"\n" + "="*50)
    print(f" [INFO] Training Model: {model_name}")
    print("="*50)
    
    optimizer = optim.Adam(model.parameters(), lr=Config.LR)
    criterion = nn.BCELoss()
    
    # --- Training Phase ---
    model.train()
    for epoch in range(Config.EPOCHS):
        total_loss = 0
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{Config.EPOCHS}")
        for inputs, labels in loop:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())
        
    # --- Evaluation Phase ---
    print(f"\n[INFO] Evaluating {model_name}...")
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Testing"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).squeeze()
            preds = (outputs > 0.5).float()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    # --- Detailed Metrics ---
    print(f"\n >>> {model_name} Detailed Classification Report <<<")
    
    # 1. Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    tn, fp, fn, tp = cm.ravel()
    print(f"\n[Confusion Matrix]")
    print(f"TN (정상->정상): {tn} \t FP (정상->공격/오탐): {fp}")
    print(f"FN (공격->정상/미탐): {fn} \t TP (공격->공격/정탐): {tp}")
    print("-" * 40)
    
    # 2. Classification Report
    print(classification_report(all_labels, all_preds, target_names=['Normal', 'Attack'], digits=4))
    
    f1 = f1_score(all_labels, all_preds)
    return f1

# ==========================================
# 5. 메인 실행
# ==========================================
if __name__ == "__main__":
    # 1. 데이터 로드
    try:
        dataset = ComparisonDataset(Config.CSV_PATH)
    except FileNotFoundError:
        print(f"[ERROR] {Config.CSV_PATH} 파일이 없습니다.")
        exit()
    
    # 데이터셋 분할 (Train 80% / Test 20%)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
    # DataLoader 최적화 (Batch Size 증가, Num Workers, Pin Memory)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=Config.BATCH_SIZE, 
        shuffle=True, 
        num_workers=Config.NUM_WORKERS, 
        pin_memory=True
    )
    test_loader = DataLoader(
        test_dataset, 
        batch_size=Config.BATCH_SIZE, 
        shuffle=False, 
        num_workers=Config.NUM_WORKERS, 
        pin_memory=True
    )
    
    vocab_size = len(dataset.vocab) + 1
    print(f"[INFO] Vocab Size: {vocab_size}")
    print(f"[INFO] Train Samples: {len(train_dataset)}, Test Samples: {len(test_dataset)}")
    
    # 2. 모델 초기화
    model_lstm = LSTMClassifier(vocab_size, Config.EMBED_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    model_cnn_lstm = CNNLSTMClassifier(vocab_size, Config.EMBED_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    model_bilstm = BiLSTMClassifier(vocab_size, Config.EMBED_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    
    # 3. 학습 및 비교 실행
    results = {}
    results['LSTM'] = train_and_evaluate("LSTM", model_lstm, train_loader, test_loader, Config.DEVICE)
    results['CNN-LSTM'] = train_and_evaluate("CNN+LSTM", model_cnn_lstm, train_loader, test_loader, Config.DEVICE)
    results['Bi-LSTM'] = train_and_evaluate("Bi-LSTM", model_bilstm, train_loader, test_loader, Config.DEVICE)
    
    print("\n" + "="*50)
    print(" [Final F1-Score Comparison]")
    print("="*50)
    for name, score in results.items():
        print(f"{name}: {score:.4f}")
    print("="*50)

[INFO] Using device: cuda
[INFO] 전체 샘플 수: 4078789
[INFO] Train Samples: 3263031, Test Samples: 815758
[INFO] Vocab Size: 73

 [INFO] Training Model: LSTM


Epoch 1/5: 100%|██████████| 3187/3187 [04:51<00:00, 10.93it/s, loss=0.396]


[Epoch 1] Avg Loss: 0.4096


Epoch 2/5: 100%|██████████| 3187/3187 [08:49<00:00,  6.02it/s, loss=0.361]


[Epoch 2] Avg Loss: 0.3879


Epoch 3/5: 100%|██████████| 3187/3187 [07:54<00:00,  6.72it/s, loss=0.349]


[Epoch 3] Avg Loss: 0.3840


Epoch 4/5: 100%|██████████| 3187/3187 [09:30<00:00,  5.58it/s, loss=0.366]


[Epoch 4] Avg Loss: 0.3824


Epoch 5/5: 100%|██████████| 3187/3187 [05:27<00:00,  9.73it/s, loss=0.349]


[Epoch 5] Avg Loss: 0.3807

[INFO] Evaluating LSTM...


Testing: 100%|██████████| 797/797 [01:11<00:00, 11.16it/s]



 >>> LSTM Detailed Classification Report <<<

[Confusion Matrix]
TN (정상->정상): 589351 	 FP (정상->공격/오탐): 17372
FN (공격->정상/미탐): 105739 	 TP (공격->공격/정탐): 103296
----------------------------------------
              precision    recall  f1-score   support

      Normal     0.8479    0.9714    0.9054    606723
      Attack     0.8560    0.4942    0.6266    209035

    accuracy                         0.8491    815758
   macro avg     0.8520    0.7328    0.7660    815758
weighted avg     0.8500    0.8491    0.8340    815758


 [INFO] Training Model: CNN+LSTM


Epoch 1/5: 100%|██████████| 3187/3187 [09:22<00:00,  5.67it/s, loss=0.413]


[Epoch 1] Avg Loss: 0.3929


Epoch 2/5: 100%|██████████| 3187/3187 [06:30<00:00,  8.16it/s, loss=0.349]


[Epoch 2] Avg Loss: 0.3814


Epoch 3/5: 100%|██████████| 3187/3187 [05:15<00:00, 10.11it/s, loss=0.369]


[Epoch 3] Avg Loss: 0.3793


Epoch 4/5: 100%|██████████| 3187/3187 [05:18<00:00, 10.00it/s, loss=0.369]


[Epoch 4] Avg Loss: 0.3780


Epoch 5/5: 100%|██████████| 3187/3187 [05:12<00:00, 10.19it/s, loss=0.366]


[Epoch 5] Avg Loss: 0.3770

[INFO] Evaluating CNN+LSTM...


Testing: 100%|██████████| 797/797 [01:10<00:00, 11.27it/s]



 >>> CNN+LSTM Detailed Classification Report <<<

[Confusion Matrix]
TN (정상->정상): 589035 	 FP (정상->공격/오탐): 17688
FN (공격->정상/미탐): 103049 	 TP (공격->공격/정탐): 105986
----------------------------------------
              precision    recall  f1-score   support

      Normal     0.8511    0.9708    0.9070    606723
      Attack     0.8570    0.5070    0.6371    209035

    accuracy                         0.8520    815758
   macro avg     0.8540    0.7389    0.7721    815758
weighted avg     0.8526    0.8520    0.8379    815758


 [INFO] Training Model: Bi-LSTM


Epoch 1/5: 100%|██████████| 3187/3187 [05:25<00:00,  9.81it/s, loss=0.398]


[Epoch 1] Avg Loss: 0.4038


Epoch 2/5: 100%|██████████| 3187/3187 [05:23<00:00,  9.86it/s, loss=0.388]


[Epoch 2] Avg Loss: 0.3865


Epoch 3/5: 100%|██████████| 3187/3187 [05:26<00:00,  9.76it/s, loss=0.381]


[Epoch 3] Avg Loss: 0.3829


Epoch 4/5: 100%|██████████| 3187/3187 [07:50<00:00,  6.78it/s, loss=0.357]


[Epoch 4] Avg Loss: 0.3807


Epoch 5/5: 100%|██████████| 3187/3187 [06:59<00:00,  7.60it/s, loss=0.391]


[Epoch 5] Avg Loss: 0.3795

[INFO] Evaluating Bi-LSTM...


Testing: 100%|██████████| 797/797 [01:17<00:00, 10.27it/s]



 >>> Bi-LSTM Detailed Classification Report <<<

[Confusion Matrix]
TN (정상->정상): 594793 	 FP (정상->공격/오탐): 11930
FN (공격->정상/미탐): 109981 	 TP (공격->공격/정탐): 99054
----------------------------------------
              precision    recall  f1-score   support

      Normal     0.8439    0.9803    0.9070    606723
      Attack     0.8925    0.4739    0.6191    209035

    accuracy                         0.8506    815758
   macro avg     0.8682    0.7271    0.7630    815758
weighted avg     0.8564    0.8506    0.8332    815758


 [Final F1-Score Comparison]
LSTM: 0.6266
CNN-LSTM: 0.6371
Bi-LSTM: 0.6191


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from collections import Counter
from tqdm import tqdm
import random
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report, precision_score, recall_score

# ==========================================
# 1. 설정 및 하이퍼파라미터
# ==========================================
class Config:
    CSV_PATH = "LID-DS-2021_Seq2Seq_Dataset.csv"  # 전처리된 데이터셋 경로
    MAX_LEN = 50          # 시퀀스 최대 길이
    BATCH_SIZE = 1024     
    NUM_WORKERS = 0       # 데이터 로딩 병렬 처리 개수 (CPU 코어 수에 맞춰 조절 가능)
    
    EMBED_DIM = 64
    HIDDEN_DIM = 128
    
    # 학습 파이프라인 설정
    PRETRAIN_EPOCHS = 5   # 지도학습 에폭 수
    RL_EPOCHS = 5         # 강화학습 에폭 수
    LR = 0.001
    
    # RL 파라미터 (논문 참조)
    ALPHA = 0.5           # Custom Loss 가중치
    GAMMA = 0.99          # Discount Factor
    
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 2. 데이터셋 및 단어 사전 (공통)
# ==========================================
class Vocabulary:
    def __init__(self):
        self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
        self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
        self.freq_threshold = 1

    def __len__(self):
        return len(self.stoi)

    def build_vocabulary(self, sentence_list):
        frequencies = Counter()
        idx = 4
        for sentence in sentence_list:
            if not isinstance(sentence, str): continue
            for word in sentence.split():
                frequencies[word] += 1
        
        for word, count in frequencies.items():
            if count >= self.freq_threshold:
                self.stoi[word] = idx
                self.itos[idx] = word
                idx += 1

    def numericalize(self, text):
        tokenized_text = str(text).split()
        return [self.stoi.get(token, self.stoi["<UNK>"]) for token in tokenized_text]

class LIDDSDataset(Dataset):
    def __init__(self, csv_file, vocab):
        self.df = pd.read_csv(csv_file)
        self.vocab = vocab
        self.max_len = Config.MAX_LEN
        # 문자열 변환 및 필터링
        self.df['input_sequence'] = self.df['input_sequence'].fillna("").astype(str)
        self.df['target_keywords'] = self.df['target_keywords'].fillna("").astype(str)
        self.df = self.df[self.df['input_sequence'].str.len() > 0]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index]
        input_text = row['input_sequence']
        target_text = row['target_keywords']
        label = row['label'] # 0 (정상) or 1 (공격)

        input_indices = self.vocab.numericalize(input_text)
        target_indices = [self.vocab.stoi["<SOS>"]] + self.vocab.numericalize(target_text) + [self.vocab.stoi["<EOS>"]]

        # Padding
        input_indices = self.pad_seq(input_indices)
        target_indices = self.pad_seq(target_indices)

        return (torch.tensor(input_indices), 
                torch.tensor(target_indices), 
                torch.tensor(label, dtype=torch.float))

    def pad_seq(self, seq):
        if len(seq) < self.max_len:
            return seq + [self.vocab.stoi["<PAD>"]] * (self.max_len - len(seq))
        return seq[:self.max_len]

# ==========================================
# 3. 모델 정의 (Seq2Seq with Attention)
# ==========================================
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, batch_first=True)

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, (hidden, cell) = self.rnn(embedded)
        return outputs, hidden, cell

class Attention(nn.Module):
    def __init__(self, hid_dim):
        super().__init__()
        self.attn = nn.Linear(hid_dim * 2, hid_dim)
        self.v = nn.Linear(hid_dim, 1, bias=False)

    def forward(self, hidden, encoder_outputs):
        # hidden: [1, batch, hid] -> [batch, seq, hid]
        src_len = encoder_outputs.shape[1]
        hidden = hidden.permute(1, 0, 2).repeat(1, src_len, 1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attention = self.v(energy).squeeze(2)
        return torch.softmax(attention, dim=1)

class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, attention):
        super().__init__()
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(hid_dim + emb_dim, hid_dim, batch_first=True)
        self.fc_out = nn.Linear(hid_dim * 2 + emb_dim, output_dim)

    def forward(self, input, hidden, cell, encoder_outputs):
        input = input.unsqueeze(1)
        embedded = self.embedding(input)
        a = self.attention(hidden[-1].unsqueeze(0), encoder_outputs).unsqueeze(1)
        weighted = torch.bmm(a, encoder_outputs)
        rnn_input = torch.cat((embedded, weighted), dim=2)
        output, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        prediction = self.fc_out(torch.cat((output, weighted, embedded), dim=2))
        return prediction.squeeze(1), hidden, cell

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = src.shape[0]
        trg_len = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)
        encoder_outputs, hidden, cell = self.encoder(src)
        input = trg[:, 0]
        
        for t in range(1, trg_len):
            output, hidden, cell = self.decoder(input, hidden, cell, encoder_outputs)
            outputs[:, t] = output
            top1 = output.argmax(1)
            input = trg[:, t] if random.random() < teacher_forcing_ratio else top1
            
        return outputs

# ==========================================
# 4. 강화학습 (RL) 로직 & 보상 함수
# ==========================================
def calculate_reward(pred_indices, target_indices, actual_label, vocab):
    """
    논문 Equation 19 구현: R = (0.3*r1) + (0.5*r2) - (0.2*r3)
    - r1: Pre-trained 모델과의 유사도 (여기서는 Target Keyword와의 일치율로 근사)
    - r2: 공격 탐지율 (Detection Rate)
    - r3: 오탐율 (False Positive)
    """
    batch_size = pred_indices.shape[0]
    rewards = []

    for i in range(batch_size):
        pred_seq = pred_indices[i]
        trg_seq = target_indices[i]
        label = actual_label[i].item() # 1: Attack, 0: Normal

        # 1. r1: 키워드 정확도 (생성된 키워드가 정답 키워드에 포함되는지)
        # 0과 패딩 제외
        valid_pred = [p.item() for p in pred_seq if p.item() not in [0, 1, 2]]
        valid_trg = [t.item() for t in trg_seq if t.item() not in [0, 1, 2]]
        
        if len(valid_trg) > 0:
            match_count = sum([1 for p in valid_pred if p in valid_trg])
            r1 = match_count / len(valid_trg)
        else:
            r1 = 0.0

        # 2. r2 & r3: 가상의 Testbed 결과
        # 생성된 키워드가 유의미하면(정답과 50% 이상 일치) 탐지 성공으로 간주
        is_detected = (r1 > 0.5) 

        r2 = 0.0 # True Positive Rate
        r3 = 0.0 # False Positive Rate

        if label == 1: # 실제 공격일 때
            if is_detected:
                r2 = 1.0 # 공격을 탐지함
            else:
                r2 = 0.0 # 공격 놓침
        else: # 실제 정상일 때
            if is_detected:
                r3 = 1.0 # 정상을 공격으로 오탐 (False Positive)
            else:
                r3 = 0.0 # 정상으로 잘 판단

        # 최종 Reward 계산
        R = (0.3 * r1) + (0.5 * r2) - (0.2 * r3)
        rewards.append(R)

    return torch.tensor(rewards).to(Config.DEVICE)

def train_rl_step(model, src, trg, label, optimizer, criterion, vocab):
    model.train()
    optimizer.zero_grad()
    
    # RL에서는 Teacher Forcing을 끄고 모델이 스스로 생성하게 함
    output = model(src, trg, teacher_forcing_ratio=0.0) 
    
    # 예측된 토큰 인덱스 추출
    pred_indices = output.argmax(dim=2) # [batch, trg_len]
    
    # 보상 계산
    rewards = calculate_reward(pred_indices, trg, label, vocab) # [batch]
    
    # Loss 계산 (Base Loss)
    output_dim = output.shape[-1]
    output_flat = output[:, 1:].reshape(-1, output_dim)
    trg_flat = trg[:, 1:].reshape(-1)
    base_loss = criterion(output_flat, trg_flat)
    
    # Custom Loss (Algorithm 2)
    # 배치 평균 Reward 사용
    mean_reward = rewards.mean()
    
    # Reward를 0~1 사이로 클리핑 (정규화)
    normalized_reward = torch.clamp(mean_reward, 0.0, 1.0)
    
    # Loss에 Reward 반영: Reward가 높을수록 Loss가 작아지도록 (Gradient를 덜 업데이트하거나 방향 조정)
    # 논문 수식: Loss = Base_Loss * (1 - alpha * Reward)
    custom_loss = base_loss * (1 - Config.ALPHA * normalized_reward)
    
    custom_loss.backward()
    optimizer.step()
    
    return custom_loss.item(), mean_reward.item()

# ==========================================
# 5. 평가 함수 (Evaluation - Detailed)
# ==========================================
def evaluate_model(model, dataloader, vocab):
    model.eval()
    all_preds = [] # 1 if detected(attack), 0 if normal
    all_labels = [] # Ground Truth Labels
    
    print("[INFO] Evaluating Model...")
    with torch.no_grad():
        for src, trg, label in tqdm(dataloader, desc="Eval"):
            src, trg = src.to(Config.DEVICE), trg.to(Config.DEVICE)
            
            output = model(src, trg, teacher_forcing_ratio=0.0)
            pred_indices = output.argmax(dim=2)
            
            # 생성된 키워드를 기반으로 공격 여부 판별 (Rule Matching Simulation)
            batch_size = src.shape[0]
            for i in range(batch_size):
                p_seq = [x.item() for x in pred_indices[i] if x.item() not in [0, 1, 2]]
                t_seq = [x.item() for x in trg[i] if x.item() not in [0, 1, 2]]
                
                # Rule: 생성된 키워드가 정답 키워드 집합과 얼마나 겹치는지?
                if len(t_seq) > 0:
                    match_ratio = sum([1 for p in p_seq if p in t_seq]) / len(t_seq)
                else:
                    match_ratio = 0
                
                # 매칭율이 일정 수준 이상이면 "공격으로 탐지" 했다고 가정
                detected = 1 if match_ratio > 0.3 else 0 
                
                all_preds.append(detected)
                all_labels.append(label[i].item())
    
    # --- 상세 지표 계산 및 출력 (Cell 16 양식) ---
    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds, zero_division=0)
    rec = recall_score(all_labels, all_preds, zero_division=0)
    f1 = f1_score(all_labels, all_preds, zero_division=0)
    
    tn, fp, fn, tp = confusion_matrix(all_labels, all_preds).ravel()
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
    
    print("========================================")
    print(f"Accuracy : {acc:.4f} ({acc*100:.2f}%)")
    print("----------------------------------------")
    print(f"FPR      : {fpr:.4f} ({fpr*100:.2f}%)  # 오탐률")
    print(f"FNR      : {fnr:.4f} ({fnr*100:.2f}%)  # 미탐률")
    print("----------------------------------------")
    print(f"Recall   : {rec:.4f} ({rec*100:.2f}%)  # 검출률")
    print(f"Precision: {prec:.4f} ({prec*100:.2f}%)")
    print(f"F1-score : {f1:.4f}")
    print("========================================")
    
    return acc, f1

# ==========================================
# 6. 메인 실행 루프
# ==========================================
if __name__ == "__main__":
    print("[INFO] 1. Data Loading...")
    try:
        df = pd.read_csv(Config.CSV_PATH)
    except FileNotFoundError:
        print("[ERROR] CSV 파일이 없습니다. 전처리 코드를 먼저 실행하세요.")
        exit()
        
    vocab = Vocabulary()
    # 입력과 정답 모두를 사용하여 단어장 구축
    all_text = df['input_sequence'].fillna("").tolist() + df['target_keywords'].fillna("").tolist()
    vocab.build_vocabulary(all_text)
    print(f"Vocab Size: {len(vocab)}")
    
    dataset = LIDDSDataset(Config.CSV_PATH, vocab)
    # 데이터 분할 (Train/Test)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=Config.BATCH_SIZE, 
        shuffle=True, 
        num_workers=Config.NUM_WORKERS, 
        pin_memory=True
    )
    test_loader = DataLoader(
        test_dataset, 
        batch_size=Config.BATCH_SIZE, 
        shuffle=False, 
        num_workers=Config.NUM_WORKERS, 
        pin_memory=True
    )
    
    print("[INFO] 2. Model Initialization...")
    enc = Encoder(len(vocab), Config.EMBED_DIM, Config.HIDDEN_DIM)
    attn = Attention(Config.HIDDEN_DIM)
    dec = Decoder(len(vocab), Config.EMBED_DIM, Config.HIDDEN_DIM, attn)
    model = Seq2Seq(enc, dec, Config.DEVICE).to(Config.DEVICE)
    
    optimizer = optim.Adam(model.parameters(), lr=Config.LR)
    criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"])
    
    # ---------------------------------------------------------
    # Phase 1: Pre-training (Supervised Learning)
    # ---------------------------------------------------------
    print("\n[INFO] 3. Starting Pre-training (Supervised)...")
    for epoch in range(Config.PRETRAIN_EPOCHS):
        model.train()
        epoch_loss = 0
        for src, trg, _ in tqdm(train_loader, desc=f"Pre-train Epoch {epoch+1}"):
            src, trg = src.to(Config.DEVICE), trg.to(Config.DEVICE)
            optimizer.zero_grad()
            output = model(src, trg, teacher_forcing_ratio=0.5)
            
            output_dim = output.shape[-1]
            output = output[:, 1:].reshape(-1, output_dim)
            trg = trg[:, 1:].reshape(-1)
            
            loss = criterion(output, trg)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Pre-train Loss: {epoch_loss/len(train_loader):.4f}")
        
    # ---------------------------------------------------------
    # Phase 2: RL Training (Actor-Critic / Policy Gradient)
    # ---------------------------------------------------------
    print("\n[INFO] 4. Starting RL Training (Reward Based)...")
    for epoch in range(Config.RL_EPOCHS):
        total_loss = 0
        total_reward = 0
        for src, trg, label in tqdm(train_loader, desc=f"RL Epoch {epoch+1}"):
            src, trg, label = src.to(Config.DEVICE), trg.to(Config.DEVICE), label.to(Config.DEVICE)
            loss, reward = train_rl_step(model, src, trg, label, optimizer, criterion, vocab)
            total_loss += loss
            total_reward += reward
            
        print(f"RL Epoch {epoch+1} - Loss: {total_loss/len(train_loader):.4f}, Avg Reward: {total_reward/len(train_loader):.4f}")

    # ---------------------------------------------------------
    # Phase 3: Final Evaluation
    # ---------------------------------------------------------
    print("\n[INFO] 5. Final Evaluation...")
    acc, f1 = evaluate_model(model, test_loader, vocab)
    
    # 모델 저장
    torch.save(model.state_dict(), "rl_hids_model_final.pt")
    print("\n[INFO] Model saved to rl_hids_model_final.pt")


[INFO] 1. Data Loading...
Vocab Size: 75
Vocab Size: 75
[INFO] 2. Model Initialization...

[INFO] 3. Starting Pre-training (Supervised)...
[INFO] 2. Model Initialization...

[INFO] 3. Starting Pre-training (Supervised)...


Pre-train Epoch 1: 100%|██████████| 3187/3187 [12:52<00:00,  4.13it/s]
Pre-train Epoch 1: 100%|██████████| 3187/3187 [12:52<00:00,  4.13it/s]


Pre-train Loss: 0.5761


Pre-train Epoch 2: 100%|██████████| 3187/3187 [16:04<00:00,  3.30it/s]
Pre-train Epoch 2: 100%|██████████| 3187/3187 [16:04<00:00,  3.30it/s]


Pre-train Loss: 0.3019


Pre-train Epoch 3: 100%|██████████| 3187/3187 [15:29<00:00,  3.43it/s]
Pre-train Epoch 3: 100%|██████████| 3187/3187 [15:29<00:00,  3.43it/s]


Pre-train Loss: 0.2501


Pre-train Epoch 4:  11%|█▏        | 360/3187 [01:27<11:19,  4.16it/s]