In [3]:
import pandas as pd
import numpy as np

df = pd.read_hdf('pems_bay/pems_bay.h5')
# to numpy 
data = df.to_numpy()
data.shape

vals = df.columns.values
# construct a map from index to sensor id
sensor_id_to_ind = {sensor_id: i for i, sensor_id in enumerate(vals)}
adj_df= pd.read_csv('pems_bay/distances_bay.csv')

# obtain edges
edges = []
weights = []
for row in adj_df.values:
    if row[0] not in sensor_id_to_ind or row[1] not in sensor_id_to_ind:
        continue
    edges.append((sensor_id_to_ind[row[0]], sensor_id_to_ind[row[1]]))
    weights.append(row[2])

edges = np.array(edges)
weights = np.array(weights)
edges.shape

import pandas as pd
import numpy as np
import torch
from torch import Tensor
from torch_geometric.typing import Adj, OptTensor
from torch_scatter import scatter_add
from torch.nn import Linear, Parameter
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import matplotlib.pyplot as plt

def get_symmetrically_normalized_adjacency(edge_index, edge_weight, n_nodes):
    """
    Given an edge_index, return the same edge_index and edge weights computed as
    \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2}.
    """
    row, col = edge_index[0], edge_index[1]
    deg = scatter_add(edge_weight, col, dim=0, dim_size=n_nodes)
    deg_inv_sqrt = deg.pow_(-0.5)
    deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float("inf"), 0)
    DAD = deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    return edge_index, DAD


class FeaturePropagation(torch.nn.Module):
    def __init__(self, num_iterations: int):
        super(FeaturePropagation, self).__init__()
        self.num_iterations = num_iterations

    def propagate(self, x: Tensor, edge_index: Adj, mask: Tensor, edge_weight : Tensor) -> Tensor:
        # out is inizialized to 0 for missing values. However, its initialization does not matter for the final
        # value at convergence
        out = x
        if mask is not None:
            out = torch.zeros_like(x)
            out[mask] = x[mask]

        n_nodes = x.shape[0]
        adj = self.get_propagation_matrix(edge_index, edge_weight, n_nodes)
        for _ in range(self.num_iterations):
            # Diffuse current features
            out = torch.sparse.mm(adj, out)
            # Reset original known features
            out[mask] = x[mask]

        return out

    def get_propagation_matrix(self, edge_index, edge_weight, n_nodes):
        # Initialize all edge weights to ones if the graph is unweighted)
        edge_index, edge_weight = get_symmetrically_normalized_adjacency(edge_index, edge_weight, n_nodes=n_nodes)
        adj = torch.sparse.FloatTensor(edge_index, values=edge_weight, size=(n_nodes, n_nodes)).to(edge_index.device)

        return adj




def feature_propagation(edge_index, edge_weight, X, feature_mask, num_iterations):
    propagation_model = FeaturePropagation(num_iterations=num_iterations)

    return propagation_model.propagate(x=X, edge_index=edge_index, mask=feature_mask, edge_weight=edge_weight)


import torch


def create_mask(shape, k):
    np.random.seed(0)

    total = shape[0] * shape[1]
    num_zeros = int(total * k / 100)
    indices = np.random.choice(total, num_zeros, replace=False)
    mask = np.ones((shape[0], shape[1]), dtype=bool)
    mask.ravel()[indices] = 0
    return torch.from_numpy(mask)


def get_product_graph(mat, T):
    N = mat.shape[0]
    mat_T = mat[:, :T].clone()
    nodes = torch.arange(N) 

    edge_index_T = []
    for i in range(T):
        cur_edge_index = edge_index + i * N
        edge_index_T.append(cur_edge_index)

    for i in range(T-1):
        cur_edge_index = torch.stack([nodes + i * N, nodes + (i + 1) * N], dim=0)
        edge_index_T.append(cur_edge_index)
        cur_edge_index = torch.stack([nodes + (i + 1) * N, nodes + i * N], dim=0)
        edge_index_T.append(cur_edge_index)
    
    edge_index_T = torch.cat(edge_index_T, dim=1)
    edge_weight_T = torch.ones(edge_index_T.shape[1])
    mat_T = mat_T.T.flatten().unsqueeze(dim=1)

    return edge_index_T, edge_weight_T, mat_T


mat = torch.tensor(data).float().T
edge_index = torch.tensor(edges).long().T
edge_weight = torch.tensor(weights).float()
# invert and set 0 to 0
edge_weight = 1 / edge_weight
edge_weight[edge_weight == float("inf")] = torch.max(edge_weight[edge_weight != float("inf")]) + 1

edge_index_T, edge_weight_T, mat_T = get_product_graph(mat, mat.shape[1])

# construct a mask
missing_rate= 25
mask = create_mask(mat.shape, missing_rate)
X = torch.zeros_like(mat)



(8358, 2)

(8358, 2)

  from .autonotebook import tqdm as notebook_tqdm
  return torch._C._cuda_getDeviceCount() > 0


In [43]:
X[mask] = mat[mask].clone()

X_reconstructed = feature_propagation(edge_index, edge_weight, X, mask, 40)

# find mse 
mse = torch.nn.functional.mse_loss(X_reconstructed[~mask], mat[~mask])
print(mse)


tensor(2758.4329)


In [6]:
!ls

AnnotatedTransformer.ipynb  LICENSE	    pems_bay		  run.sh
compare_FP.py		    lightning_logs  __pycache__		  shenzhen.npy
data			    losloop.npy     pyproject.toml	  shenzhen.png
expt_cmp.ipynb		    losloop.png     README.md		  tasks
expt.ipynb		    main.py	    requirements-dev.txt  utils
huge_cmp.py		    models	    requirements.txt
imputers		    my_main.py	    run.py
