In [88]:
import numpy as np
import torch
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
device = torch.device('cuda:3')
# Generate synthetic data
n_samples = 1000
n_features = 20
centers = 2

# Generate blobs for clean data
data, _ = make_blobs(n_samples=n_samples, n_features=n_features, centers=centers, cluster_std=1.5, random_state=42)
# Add random noise to create outlier data
outliers = np.random.uniform(low=-10, high=10, size=(int(n_samples*0.1), n_features))

# Normalize the data
scaler = StandardScaler()
data = scaler.fit_transform(data)
outliers = scaler.transform(outliers)

# Convert to torch tensors
data_tensor = torch.load('ori.pth')
outliers_tensor = torch.load('trigger.pth')
data_tensor = data_tensor.to(device)
outliers_tensor = outliers_tensor.to(device)
entire_data = torch.concat([data_tensor,outliers_tensor])

In [89]:
import torch
from torch import nn

class DeepSVDDModel(nn.Module):
    def __init__(self, input_dim, rep_dim):
        super(DeepSVDDModel, self).__init__()
        # Define your neural network architecture here
        # Example: Simple feedforward network
        self.rep_dim = rep_dim
        self.net = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, rep_dim),
        )

    def forward(self, x):
        return self.net(x)

# Initialize the model
input_dim = 500  # Feature dimension
rep_dim = 64  # Desired representation dimension in the feature space
model = DeepSVDDModel(input_dim, rep_dim).to(device)

In [90]:
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

# Hyperparameters
learning_rate = 1e-4
batch_size = 4096
epochs = 500

# Prepare DataLoader
# clean_data_tensor = torch.Tensor(a)  # Convert clean data `a` to a torch Tensor
clean_data_tensor = entire_data  # Convert clean data `a` to a torch Tensor
dataset = TensorDataset(clean_data_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Center of the hypersphere, initialize with a forward pass (optional)
with torch.no_grad():
    sphere_center = model(clean_data_tensor).mean(dim=0)

# Loss function
def loss_fn(output):
    return torch.mean(torch.sum((output - sphere_center) ** 2, dim=1))

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
model.train()
for epoch in range(epochs):
    for data in dataloader:
        inputs = data[0]
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


Epoch 1, Loss: 3.691316305776127e-05
Epoch 2, Loss: 3.1643245165469125e-05
Epoch 3, Loss: 2.3243692339747213e-05
Epoch 4, Loss: 2.1881649445276707e-05
Epoch 5, Loss: 1.789537418517284e-05
Epoch 6, Loss: 1.642541064938996e-05
Epoch 7, Loss: 1.3852514712198172e-05
Epoch 8, Loss: 1.2595494808920193e-05
Epoch 9, Loss: 1.094650633604033e-05
Epoch 10, Loss: 1.0499689778953325e-05
Epoch 11, Loss: 8.893872291082516e-06
Epoch 12, Loss: 7.804786946508102e-06
Epoch 13, Loss: 7.15370924808667e-06
Epoch 14, Loss: 6.334862064250046e-06
Epoch 15, Loss: 5.6052249419735745e-06
Epoch 16, Loss: 5.02858074469259e-06
Epoch 17, Loss: 4.8178394536080305e-06
Epoch 18, Loss: 4.135093149670865e-06
Epoch 19, Loss: 3.815585387201281e-06
Epoch 20, Loss: 3.316168886158266e-06
Epoch 21, Loss: 2.985482524309191e-06
Epoch 22, Loss: 2.6024315502581885e-06
Epoch 23, Loss: 2.440124262648169e-06
Epoch 24, Loss: 2.1979060420562746e-06
Epoch 25, Loss: 1.932934992510127e-06
Epoch 26, Loss: 1.7436647112845094e-06
Epoch 27, Lo

In [91]:
from sklearn.metrics import roc_auc_score

model.eval()
with torch.no_grad():
    clean_outputs = model(data_tensor)
    outlier_outputs = model(outliers_tensor)
    clean_scores = torch.sum((clean_outputs - sphere_center) ** 2, dim=1).cpu().numpy()
    outlier_scores = torch.sum((outlier_outputs - sphere_center) ** 2, dim=1).cpu().numpy()

# Labels: 0 for clean, 1 for outlier
labels = [0] * len(clean_scores) + [1] * len(outlier_scores)
scores = np.concatenate([clean_scores, outlier_scores])
print(scores)
# Calculate AUC-ROC
auc_roc = roc_auc_score(labels, scores)
print(f"AUC-ROC: {auc_roc}")

[1.9221398e-08 1.8826688e-08 2.0444103e-08 ... 2.5725488e-08 1.8885927e-08
 1.8652152e-08]
AUC-ROC: 0.4262495564892624
