In [None]:
!pip install dgl



In [None]:
import h5py
import numpy as np
import torch
import dgl
from dgl.nn import GraphConv
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = '/content/SMAP.h5'
with h5py.File(file_path, 'r') as file:
    print(list(file.keys()))
    soil_moisture = np.array(file['Soil_Moisture_Retrieval_Data']['soil_moisture'])
    soil_moisture = soil_moisture[soil_moisture != -9999]

# Normalize the data
scaler = StandardScaler()
soil_moisture_scaled = scaler.fit_transform(soil_moisture.reshape(-1, 1)).flatten()

# Create a simple linear chain graph
data_tensor = torch.tensor(soil_moisture_scaled, dtype=torch.float32)
edges_src = torch.arange(0, len(data_tensor) - 1)
edges_dst = torch.arange(1, len(data_tensor))

# Create the graph
g = dgl.graph((edges_src, edges_dst))
g = dgl.add_self_loop(g)

# Assign features to all nodes
g.ndata['feat'] = data_tensor.unsqueeze(1)

class GCNModel(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCNModel, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats, allow_zero_in_degree=True)
        self.conv2 = GraphConv(h_feats, num_classes, allow_zero_in_degree=True)

    def forward(self, g, features):
        x = F.relu(self.conv1(g, features))
        x = self.conv2(g, x)
        return x

model = GCNModel(in_feats=1, h_feats=16, num_classes=1)
optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.MSELoss()

# Training loop
def train(graph, features, epochs=50):
    for epoch in range(epochs):
        model.train()
        logits = model(graph, features)
        loss = criterion(logits, features)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch % 5 == 0:
            print(f'Epoch {epoch}: Loss {loss.item()}')

train(g, g.ndata['feat'])

# Inference (for illustration, assume anomaly score based on deviation from zero)
model.eval()
with torch.no_grad():
    logits = model(g, g.ndata['feat'])
    anomaly_score = torch.abs(logits.squeeze() - 0)  # Deviation from zero prediction

print(anomaly_score[:10])

# Simulating accuracy calculation as discussed previously
threshold = anomaly_score.mean()
predicted_labels = (anomaly_score > threshold).float()
true_labels = (torch.rand(len(anomaly_score)) < 0.59).float()  # Simulated true labels
correct_predictions = (predicted_labels == true_labels).float().sum()
accuracy = correct_predictions / len(true_labels)
print(f'Calculated Accuracy: {accuracy:.2f}')


['Metadata', 'Soil_Moisture_Retrieval_Data', 'Soil_Moisture_Retrieval_Data_Polar']
Epoch 0: Loss 0.912585437297821
Epoch 5: Loss 0.7271197438240051
Epoch 10: Loss 0.579531729221344
Epoch 15: Loss 0.46787601709365845
Epoch 20: Loss 0.388121098279953
Epoch 25: Loss 0.3317934572696686
Epoch 30: Loss 0.2870759069919586
Epoch 35: Loss 0.24781343340873718
Epoch 40: Loss 0.21603159606456757
Epoch 45: Loss 0.19170722365379333
tensor([0.4674, 0.9193, 1.1016, 0.7824, 0.4342, 0.8097, 1.0295, 0.6469, 0.4664,
        0.7369])
Calculated Accuracy: 0.51
