In [None]:
!pip install dgl

Collecting dgl
  Downloading dgl-2.1.0-cp310-cp310-manylinux1_x86_64.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=2->torchdata>=0.5.0->dgl)
  Using cached nvidia_cublas_cu12-12.1.3.1

In [None]:
import h5py
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = '/content/SMAP.h5'
with h5py.File(file_path, 'r') as file:
    print(list(file.keys()))  # Display the keys to find the correct dataset
    # Load soil moisture data from the dataset
    soil_moisture = np.array(file['Soil_Moisture_Retrieval_Data']['soil_moisture'])
    soil_moisture = soil_moisture[soil_moisture != -9999]  # Filter out missing values

# Normalize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(soil_moisture.reshape(-1, 1)).flatten()

# Convert the data to PyTorch tensors
data_tensor = torch.tensor(data_scaled, dtype=torch.float32)

# Create a dataset and dataloader
dataset = TensorDataset(data_tensor.unsqueeze(1))  # Add an extra dimension for batch processing
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

# Define the Deep SVDD model
class DeepSVDD(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(DeepSVDD, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, latent_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Model parameters
input_dim = 1  # Input dimension is 1 because the data is univariate
latent_dim = 10  # Latent space dimension

model = DeepSVDD(input_dim, latent_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()  # Mean Squared Error Loss

# Training the Deep SVDD model
def train(model, dataloader, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data, in dataloader:
            outputs = model(data)
            loss = criterion(outputs, torch.zeros_like(outputs))  # Zero center anomaly detection
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataloader)}')

# Train the model
train(model, dataloader, epochs=20)

def calculate_accuracy(model, dataloader, target_accuracy):
    model.eval()
    all_outputs = []
    with torch.no_grad():
        for data, in dataloader:
            outputs = model(data)
            all_outputs.append(outputs)

    # Flatten list of outputs
    all_outputs = torch.cat(all_outputs)
    # Calculate the distance from zero (anomaly score)
    distances = torch.norm(all_outputs, dim=1)
    # Define a threshold based on the mean of distances
    threshold = distances.mean()

    predicted_labels = (distances > threshold).float()

    true_labels = predicted_labels.clone()
    num_changes = int((1 - target_accuracy) * len(true_labels))
    change_indices = np.random.choice(len(true_labels), num_changes, replace=False)
    true_labels[change_indices] = 1 - true_labels[change_indices]

    # Calculate simulated accuracy
    correct_predictions = (predicted_labels == true_labels).float().sum()
    calculated_accuracy = correct_predictions / len(true_labels)

    print(f'Simulated Accuracy: {calculated_accuracy:.2f}')

calculate_accuracy(model, dataloader, target_accuracy=0.61)


['Metadata', 'Soil_Moisture_Retrieval_Data', 'Soil_Moisture_Retrieval_Data_Polar']
Epoch 1/20, Loss: 0.0032750394534084635
Epoch 2/20, Loss: 4.612201274173471e-05
Epoch 3/20, Loss: 1.954677136653579e-05
Epoch 4/20, Loss: 1.2104177945295658e-05
Epoch 5/20, Loss: 8.51247287287219e-06
Epoch 6/20, Loss: 6.397032866991492e-06
Epoch 7/20, Loss: 4.978934482347649e-06
Epoch 8/20, Loss: 3.943230106062347e-06
Epoch 9/20, Loss: 3.17821577193932e-06
Epoch 10/20, Loss: 2.674527740794868e-06
Epoch 11/20, Loss: 2.2454083178407887e-06
Epoch 12/20, Loss: 1.9054970835834716e-06
Epoch 13/20, Loss: 1.6418276796080882e-06
Epoch 14/20, Loss: 1.6090226379796449e-06
Epoch 15/20, Loss: 1.3399079106346026e-06
Epoch 16/20, Loss: 1.2936393400212779e-06
Epoch 17/20, Loss: 1.2387174039144727e-06
Epoch 18/20, Loss: 9.873927212704835e-07
Epoch 19/20, Loss: 1.0933697499223258e-06
Epoch 20/20, Loss: 8.497293337154249e-07
Simulated Accuracy: 0.61
