In [None]:
import pandas as pd
from tqdm import tqdm
import torch
import random
import csv
import pandas as pd
import os
import statistics
import torch.nn as nn
import torch.nn.functional as F
from collections import defaultdict
import pickle

# load mean and stddev
with open('feat_dict.pkl', 'rb') as file:
    values = pickle.load(file)

def dataAug(index_list):
    aug_features = []
    for index in index_list:
        ori_start_index = int(index.item())
        ori_label = values[ori_start_index]['label']
        
        random_number = random.randint(1, 10)
        start_index = ori_start_index + random_number
        
        if start_index in values:
            start_label = values[start_index]['label']
            if start_label != ori_label:
                start_index = ori_start_index - random_number
                start_label = values[start_index]['label']
        else:
            start_index = ori_start_index - random_number
            start_label = values[start_index]['label']
        
        aug_feat = values[start_index]['feature']
        aug_features.append(aug_feat)
    return aug_features

In [None]:
# model

class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dims, feat_dim):
        super(Encoder, self).__init__()

        layers = []
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(input_dim, hidden_dim))
            layers.append(nn.ReLU(inplace=True))
            input_dim = hidden_dim
        self.encoder = nn.Sequential(*layers)


    def forward(self, x):
        feat = self.encoder(x)
        return feat
    

class Proj(nn.Module):
    def __init__(self, input_dim, hidden_dims, feat_dim):
        super(Proj, self).__init__()
        self.head = nn.Sequential(
            nn.Linear(input_dim, input_dim),
            nn.ReLU(inplace=True),
            nn.Linear(input_dim, feat_dim)
        )

    def forward(self, x):
        contrastive_feat = F.normalize(self.head(x), dim=1)

        return contrastive_feat
    
class ClassificationHead(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassificationHead, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

In [None]:
import torch.optim as optim
from losses import SupConLoss
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
import os
from sklearn.preprocessing import label_binarize
from tqdm import tqdm
import joblib
from sklearn.metrics import average_precision_score
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 从文件中读取均值和标准差
with open('scaler_params.pkl', 'rb') as file:
    mean_cpu, std_cpu = pickle.load(file)
mean = torch.tensor(mean_cpu).to(device)
std = torch.tensor(std_cpu).to(device)
performances = []
cls_losses = []
con_losses = []
acc_list= []

def train(data_loader, time, host_list, criterion, optimizer, device, epochs, save_path, Lambda = 0):

    encoder.train()
    projector.train()
    classification_model.train()
    
    for epoch in range(epochs):
        total_loss1 = 0
        total_loss2 = 0
        correct = 0
        total_samples = 0
        all_labels = []
        all_preds = []
        
        progress_bar = tqdm(data_loader, desc=f"Epoch {epoch+1}/{epochs}")

        for batch_features, labels in progress_bar:
            batch_features = batch_features.to(device)
            labels = labels.to(device)

            features1 = batch_features[:,:-1]
            aug_index = batch_features[:,-1:]
            
            # data augmentation
            aug_features = dataAug(aug_index)
            aug_features = torch.tensor(aug_features, dtype=torch.float32)
            aug_features = aug_features.to(device)
            aug_features = (aug_features - mean) / std

            bsz = labels.shape[0]
            aug_input = torch.cat([features1, aug_features], dim=0)

            optimizer.zero_grad()  
            embs = encoder(aug_input)
            embs_to_predict = embs[:bsz,:]
            contrastive_feat = projector(embs)
            predictions = classification_model(embs_to_predict)
            
            f1, f2 = torch.split(contrastive_feat, [bsz, bsz], dim=0)
            features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1)
            
            loss1 = criterion(features = features, device = device , labels = labels)
            loss2 = criterion2(predictions , labels)
            loss = loss2 + Lambda*loss1
            loss.backward()  
            optimizer.step()  
            
            _, predicted = torch.max(predictions, 1)
            correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
            total_loss1 += loss1.item()
            total_loss2 += loss2.item()
            progress_bar.set_description(f"Epoch {epoch+1}/{epochs}")

        average_loss1 = total_loss1 / len(train_dataloader)
        average_loss2 = total_loss2 / len(train_dataloader)
        accuracy = correct / total_samples

        print(f"Epoch [{epoch+1}/{epochs}] CLS_loss: {average_loss2:.4f} Con_loss: {average_loss1:.4f} Accuracy: {accuracy:.4f}")
        cls_losses.append(average_loss2)
        con_losses.append(average_loss1)
        acc_list.append(accuracy)
        
        # evaluation for test dataset
        classification_model.eval()  
        test_loss = 0.0
        correct = 0
        total = 0
        all_labels = []
        all_preds = []
        all_probs = []  

        with torch.no_grad():  
            
            for test_data, test_labels in test_dataloader:
                test_data = test_data.to(device)
                test_labels = test_labels.to(device)

                test_features = test_data[:,:-1]

                encoder_output = encoder(test_features)
                predictions = classification_model(encoder_output)

                loss = criterion2(predictions, test_labels)
                test_loss += loss.item()

                _, predicted = torch.max(predictions, 1)
                all_labels.extend(test_labels.cpu().numpy())
                all_preds.extend(predicted.cpu().numpy())
                
                probs = torch.softmax(predictions, dim=1)
                all_probs.append(probs.cpu().numpy()) 
        
        all_probs = np.concatenate(all_probs)
        all_labels_one_hot = np.eye(predictions.shape[1])[all_labels] 
        mAP = average_precision_score(all_labels_one_hot, all_probs, average="macro")

        print(f"Mean Average Precision (mAP): {mAP:.4f}")
        
        accuracy = accuracy_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds, average='macro')
        recall = recall_score(all_labels, all_preds, average='macro')
        f1 = f1_score(all_labels, all_preds, average='macro')
        
        print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

        classification_model.train()
        
        performances.append(round(mAP, 4))
        
    print(performances)


In [None]:
import pandas as pd
import torch
import random
import csv
import pandas as pd
import os
import statistics
import torch.nn as nn
import torch.nn.functional as F

encoder = Encoder(input_dim=10, hidden_dims=[10, 16, 32, 16], feat_dim=4).to(device)
projector = Proj(input_dim=16, hidden_dims=[10, 16], feat_dim=4).to(device)

classification_model = ClassificationHead(input_dim=16, num_classes=7)
classification_model.to(device)

# start training
time = 1
host_list = ['223.5.5.5', '8.8.8.8']

criterion = SupConLoss()
criterion.to(device)

def adjust_learning_rate(optimizer, epoch, initial_lr=0.01, step_size=0.01, max_lr=0.5):
    new_lr = initial_lr + (epoch * step_size)
    new_lr = min(new_lr, max_lr)

    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

optimizer = torch.optim.AdamW([
                {'params': encoder.parameters()},
                {'params': classification_model.parameters()},
                {'params': projector.parameters()},
            ], lr = 0.001)

train(train_dataloader, time, host_list, criterion, optimizer, device, epochs=100, save_path='checkpoints_new/',Lambda = 0.25)